Spaces:
Running
Running
| """ | |
| 🎙️ Kokoro-TTS-only demo – Zero-GPU edition | |
| Routes every synthesis to an idle A100. | |
| """ | |
| import os, tempfile, subprocess, numpy as np | |
| import gradio as gr | |
| import spaces # Zero-GPU decorator | |
| import soundfile as sf | |
| # ------------------------------------------------------------------ | |
| # 1. Lazy Kokoro loader (runs once per GPU worker) | |
| # ------------------------------------------------------------------ | |
| kokoro_pipe = None | |
| def load_kokoro(): | |
| global kokoro_pipe | |
| if kokoro_pipe is None: | |
| from kokoro import KPipeline | |
| kokoro_pipe = KPipeline(lang_code='a') | |
| return kokoro_pipe | |
| # ------------------------------------------------------------------ | |
| # 2. Generation helper | |
| # ------------------------------------------------------------------ | |
| def tts_kokoro(text, voice, speed): | |
| pipe = load_kokoro() | |
| generator = pipe(text, voice=voice, speed=speed) | |
| for gs, ps, audio in generator: | |
| return audio | |
| raise RuntimeError("Kokoro generation failed") | |
| # ------------------------------------------------------------------ | |
| # 3. Zero-GPU entry point | |
| # ------------------------------------------------------------------ | |
| def synthesise(text, voice, speed): | |
| if not text.strip(): | |
| raise gr.Error("Please enter some text.") | |
| wav = tts_kokoro(text, voice=voice, speed=speed) | |
| fd, tmp = tempfile.mkstemp(suffix='.wav') | |
| os.close(fd) | |
| sf.write(tmp, wav, 24000) | |
| return tmp | |
| # ------------------------------------------------------------------ | |
| # 4. Gradio UI | |
| # ------------------------------------------------------------------ | |
| css = """footer {visibility: hidden}""" | |
| with gr.Blocks(css=css, title="Kokoro TTS – Zero-GPU") as demo: | |
| gr.Markdown("## 🎙️ Kokoro TTS – Zero-GPU Demo") | |
| with gr.Row(): | |
| with gr.Column(): | |
| voice = gr.Dropdown( | |
| label="Voice", | |
| choices=['af_heart', 'af_sky', 'af_mist', 'af_dusk'], | |
| value='af_heart' | |
| ) | |
| speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed") | |
| with gr.Column(scale=3): | |
| text = gr.Textbox( | |
| label="Text to speak", | |
| placeholder="Type or paste text here …", | |
| lines=6, max_lines=12 | |
| ) | |
| btn = gr.Button("🎧 Synthesise", variant="primary") | |
| audio_out = gr.Audio(label="Generated speech", type="filepath") | |
| btn.click(synthesise, inputs=[text, voice, speed], outputs=audio_out) | |
| gr.Markdown("### Tips \n" | |
| "- **Kokoro** – fast, high-quality English TTS \n" | |
| "Audio is returned as 24 kHz WAV.") | |
| demo.launch() |