coquiAPINew

Sleeping

App Files Files Community

anuj-exe commited on Oct 7

Commit

b633034

verified ·

1 Parent(s): 1c7dda3

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -17

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from TTS.api import TTS
-from fastapi import FastAPI, Response
-from fastapi.responses import FileResponse
 import uvicorn
 import time
 import os
@@ -10,7 +10,10 @@ import os
 # Configuration
 # -----------------------------
 YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
-FIXED_SPEAKER_PATH = "speakers/voice2.wav"
 OUTPUT_PATH = "output.wav"
 tts = TTS(YOURTTS_MODEL, gpu=False)
@@ -18,15 +21,16 @@ tts = TTS(YOURTTS_MODEL, gpu=False)
 # -----------------------------
 # Core synthesis function
 # -----------------------------
-def synthesize(text: str):
-    start_time = time.time()
-    if not os.path.exists(FIXED_SPEAKER_PATH):
-        return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"}
     try:
         tts.tts_to_file(
             text=text,
-            speaker_wav=FIXED_SPEAKER_PATH,
             file_path=OUTPUT_PATH,
             language="en"
         )
@@ -41,7 +45,7 @@ def synthesize(text: str):
         "processing_time_sec": round(total_time, 3),
         "real_time_factor": rtf,
         "model_used": YOURTTS_MODEL,
-        "speaker_used": os.path.basename(FIXED_SPEAKER_PATH),
     }
     return OUTPUT_PATH, info
@@ -51,16 +55,17 @@ def synthesize(text: str):
 app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API")
 @app.post("/synthesize")
-async def predict(request: dict):
-    text = request.get("text")
     if not text:
-        return {"error": "Missing 'text' field"}
-    audio_path, info = synthesize(text)
     if audio_path is None:
-        return info
-    # Use headers to send metadata, file as response
     headers = {f"x-{k}": str(v) for k, v in info.items()}
     return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers)
@@ -69,9 +74,12 @@ async def predict(request: dict):
 # -----------------------------
 demo = gr.Interface(
     fn=synthesize,
-    inputs=[gr.Textbox(label="Text")],
     outputs=[gr.Audio(type="filepath"), gr.JSON()],
-    title="YourTTS Voice Cloning (English Only, Fixed Speaker)",
     allow_flagging="never"
 )

 import gradio as gr
 from TTS.api import TTS
+from fastapi import FastAPI, Request
+from fastapi.responses import FileResponse, JSONResponse
 import uvicorn
 import time
 import os
 # Configuration
 # -----------------------------
 YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
+SPEAKERS = {
+    "male": "speakers/voice1.wav",
+    "female": "speakers/voice2.wav"
+}
 OUTPUT_PATH = "output.wav"
 tts = TTS(YOURTTS_MODEL, gpu=False)
 # -----------------------------
 # Core synthesis function
 # -----------------------------
+def synthesize(text: str, speaker: str = "female"):
+    speaker_path = SPEAKERS.get(speaker.lower())
+    if not speaker_path or not os.path.exists(speaker_path):
+        return None, {"error": f"❌ Speaker file not found: {speaker_path}"}
+    start_time = time.time()
     try:
         tts.tts_to_file(
             text=text,
+            speaker_wav=speaker_path,
             file_path=OUTPUT_PATH,
             language="en"
         )
         "processing_time_sec": round(total_time, 3),
         "real_time_factor": rtf,
         "model_used": YOURTTS_MODEL,
+        "speaker_used": os.path.basename(speaker_path),
     }
     return OUTPUT_PATH, info
 app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API")
 @app.post("/synthesize")
+async def predict(request: Request):
+    data = await request.json()
+    text = data.get("text")
+    speaker = data.get("speaker", "female")
     if not text:
+        return JSONResponse({"error": "Missing 'text' field"}, status_code=400)
+    audio_path, info = synthesize(text, speaker)
     if audio_path is None:
+        return JSONResponse(info, status_code=500)
     headers = {f"x-{k}": str(v) for k, v in info.items()}
     return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers)
 # -----------------------------
 demo = gr.Interface(
     fn=synthesize,
+    inputs=[
+        gr.Textbox(label="Text"),
+        gr.Dropdown(choices=["male", "female"], value="female", label="Speaker")
+    ],
     outputs=[gr.Audio(type="filepath"), gr.JSON()],
+    title="YourTTS Voice Cloning (English Only, Select Speaker)",
     allow_flagging="never"
 )