Text2Audio / app.py
IFMedTechdemo's picture
Update app.py
43d1773 verified
raw
history blame
2.69 kB
"""
🎙️ Kokoro-TTS-only demo – Zero-GPU edition
Routes every synthesis to an idle A100.
"""
import os, tempfile, subprocess, numpy as np
import gradio as gr
import spaces # Zero-GPU decorator
import soundfile as sf
# ------------------------------------------------------------------
# 1. Lazy Kokoro loader (runs once per GPU worker)
# ------------------------------------------------------------------
kokoro_pipe = None
def load_kokoro():
global kokoro_pipe
if kokoro_pipe is None:
from kokoro import KPipeline
kokoro_pipe = KPipeline(lang_code='a')
return kokoro_pipe
# ------------------------------------------------------------------
# 2. Generation helper
# ------------------------------------------------------------------
@spaces.GPU
def tts_kokoro(text, voice, speed):
pipe = load_kokoro()
generator = pipe(text, voice=voice, speed=speed)
for gs, ps, audio in generator:
return audio
raise RuntimeError("Kokoro generation failed")
# ------------------------------------------------------------------
# 3. Zero-GPU entry point
# ------------------------------------------------------------------
@spaces.GPU
def synthesise(text, voice, speed):
if not text.strip():
raise gr.Error("Please enter some text.")
wav = tts_kokoro(text, voice=voice, speed=speed)
fd, tmp = tempfile.mkstemp(suffix='.wav')
os.close(fd)
sf.write(tmp, wav, 24000)
return tmp
# ------------------------------------------------------------------
# 4. Gradio UI
# ------------------------------------------------------------------
css = """footer {visibility: hidden}"""
with gr.Blocks(css=css, title="Kokoro TTS – Zero-GPU") as demo:
gr.Markdown("## 🎙️ Kokoro TTS – Zero-GPU Demo")
with gr.Row():
with gr.Column():
voice = gr.Dropdown(
label="Voice",
choices=['af_heart', 'af_sky', 'af_mist', 'af_dusk'],
value='af_heart'
)
speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed")
with gr.Column(scale=3):
text = gr.Textbox(
label="Text to speak",
placeholder="Type or paste text here …",
lines=6, max_lines=12
)
btn = gr.Button("🎧 Synthesise", variant="primary")
audio_out = gr.Audio(label="Generated speech", type="filepath")
btn.click(synthesise, inputs=[text, voice, speed], outputs=audio_out)
gr.Markdown("### Tips \n"
"- **Kokoro** – fast, high-quality English TTS \n"
"Audio is returned as 24 kHz WAV.")
demo.launch()