Spaces:
Running
on
Zero
Running
on
Zero
# Imports | |
import gradio as gr | |
import spaces | |
import torch | |
import numpy as np | |
from kokoro import KModel, KPipeline | |
# Pre-Initialize | |
DEVICE = "auto" | |
if DEVICE == "auto": | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"[SYSTEM] | Using {DEVICE} type compute device.") | |
# Variables | |
SILENT_THRESHOLD = 0.01 | |
CHAR_LIMIT = 2000 | |
DEFAULT_INPUT = "" | |
DEFAULT_VOICE = "af_heart" | |
CHOICES = { | |
"πΊπΈ πΊ Heart β€οΈ": "af_heart", | |
"πΊπΈ πΊ Bella π₯": "af_bella", | |
"πΊπΈ πΊ Nicole π§": "af_nicole", | |
"πΊπΈ πΊ Aoede": "af_aoede", | |
"πΊπΈ πΊ Kore": "af_kore", | |
"πΊπΈ πΊ Sarah": "af_sarah", | |
"πΊπΈ πΊ Nova": "af_nova", | |
"πΊπΈ πΊ Sky": "af_sky", | |
"πΊπΈ πΊ Alloy": "af_alloy", | |
"πΊπΈ πΊ Jessica": "af_jessica", | |
"πΊπΈ πΊ River": "af_river", | |
"πΊπΈ πΉ Michael": "am_michael", | |
"πΊπΈ πΉ Fenrir": "am_fenrir", | |
"πΊπΈ πΉ Puck": "am_puck", | |
"πΊπΈ πΉ Echo": "am_echo", | |
"πΊπΈ πΉ Eric": "am_eric", | |
"πΊπΈ πΉ Liam": "am_liam", | |
"πΊπΈ πΉ Onyx": "am_onyx", | |
"πΊπΈ πΉ Santa": "am_santa", | |
"πΊπΈ πΉ Adam": "am_adam", | |
"π¬π§ πΊ Emma": "bf_emma", | |
"π¬π§ πΊ Isabella": "bf_isabella", | |
"π¬π§ πΊ Alice": "bf_alice", | |
"π¬π§ πΊ Lily": "bf_lily", | |
"π¬π§ πΉ George": "bm_george", | |
"π¬π§ πΉ Fable": "bm_fable", | |
"π¬π§ πΉ Lewis": "bm_lewis", | |
"π¬π§ πΉ Daniel": "bm_daniel", | |
} | |
PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"} | |
PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kΛOkΙΙΉO" | |
PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kΛQkΙΙΉQ" | |
for v in CHOICES.values(): | |
PIPELINES[v[0]].load_voice(v) | |
MODEL = KModel().eval() | |
css = ''' | |
.gradio-container{max-width: 560px !important} | |
h1{text-align:center} | |
footer { | |
visibility: hidden | |
} | |
''' | |
# Functions | |
def trim_silence(audio, threshold=SILENT_THRESHOLD): | |
abs_audio = np.abs(audio) | |
indices = np.where(abs_audio > threshold)[0] | |
if len(indices) == 0: return audio | |
start = indices[0] | |
end = indices[-1] + 1 | |
return audio[start:end] | |
def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1): | |
text = text.strip()[:CHAR_LIMIT] + "." | |
pipeline = PIPELINES[voice[0]] | |
pack = pipeline.load_voice(voice) | |
for _, ps, _ in pipeline(text, voice, speed): | |
ref_s = pack[len(ps) - 1] | |
audio = MODEL(ps, ref_s, speed) | |
return (24000, trim_silence(audio.numpy())) | |
def cloud(): | |
print("[CLOUD] | Space maintained.") | |
def gpu(): | |
return | |
# Initialize | |
with gr.Blocks(css=css) as main: | |
with gr.Column(): | |
gr.Markdown("πͺ Instantly generate realistic voices using text input.") | |
with gr.Column(): | |
input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input") | |
voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice") | |
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed") | |
submit = gr.Button("βΆ") | |
maintain = gr.Button("βοΈ") | |
with gr.Column(): | |
output = gr.Audio(label="Output") | |
submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output) | |
maintain.click(cloud, inputs=[], outputs=[], queue=False) | |
main.launch(show_api=True) |