File size: 2,685 Bytes
1d84a6c
43d1773
1d84a6c
 
 
 
 
43d1773
1d84a6c
 
 
43d1773
1d84a6c
 
 
 
 
43d1773
 
1d84a6c
 
 
 
43d1773
1d84a6c
43d1773
1d84a6c
 
 
 
 
 
 
 
43d1773
1d84a6c
 
43d1773
1d84a6c
 
43d1773
1d84a6c
 
 
 
 
 
43d1773
1d84a6c
43d1773
1d84a6c
43d1773
 
1d84a6c
 
43d1773
 
 
 
 
 
 
1d84a6c
 
43d1773
 
 
 
 
 
1d84a6c
 
43d1773
1d84a6c
 
43d1773
1d84a6c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
🎙️ Kokoro-TTS-only demo – Zero-GPU edition
Routes every synthesis to an idle A100.
"""

import os, tempfile, subprocess, numpy as np
import gradio as gr
import spaces           # Zero-GPU decorator
import soundfile as sf

# ------------------------------------------------------------------
# 1.  Lazy Kokoro loader (runs once per GPU worker)
# ------------------------------------------------------------------
kokoro_pipe = None

def load_kokoro():
    global kokoro_pipe
    if kokoro_pipe is None:
        from kokoro import KPipeline
        kokoro_pipe = KPipeline(lang_code='a')
    return kokoro_pipe

# ------------------------------------------------------------------
# 2.  Generation helper
# ------------------------------------------------------------------
@spaces.GPU
def tts_kokoro(text, voice, speed):
    pipe = load_kokoro()
    generator = pipe(text, voice=voice, speed=speed)
    for gs, ps, audio in generator:
        return audio
    raise RuntimeError("Kokoro generation failed")

# ------------------------------------------------------------------
# 3.  Zero-GPU entry point
# ------------------------------------------------------------------
@spaces.GPU
def synthesise(text, voice, speed):
    if not text.strip():
        raise gr.Error("Please enter some text.")
    wav = tts_kokoro(text, voice=voice, speed=speed)
    fd, tmp = tempfile.mkstemp(suffix='.wav')
    os.close(fd)
    sf.write(tmp, wav, 24000)
    return tmp

# ------------------------------------------------------------------
# 4.  Gradio UI
# ------------------------------------------------------------------
css = """footer {visibility: hidden}"""

with gr.Blocks(css=css, title="Kokoro TTS – Zero-GPU") as demo:
    gr.Markdown("## 🎙️ Kokoro TTS – Zero-GPU Demo")

    with gr.Row():
        with gr.Column():
            voice = gr.Dropdown(
                label="Voice",
                choices=['af_heart', 'af_sky', 'af_mist', 'af_dusk'],
                value='af_heart'
            )
            speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed")

        with gr.Column(scale=3):
            text = gr.Textbox(
                label="Text to speak",
                placeholder="Type or paste text here …",
                lines=6, max_lines=12
            )
            btn = gr.Button("🎧 Synthesise", variant="primary")
            audio_out = gr.Audio(label="Generated speech", type="filepath")

    btn.click(synthesise, inputs=[text, voice, speed], outputs=audio_out)

    gr.Markdown("### Tips  \n"
                "- **Kokoro** – fast, high-quality English TTS  \n"
                "Audio is returned as 24 kHz WAV.")

demo.launch()