Spaces:
Paused
Paused
| # Imports | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| import numpy as np | |
| from kokoro import KModel, KPipeline | |
| # Pre-Initialize | |
| DEVICE = "auto" | |
| if DEVICE == "auto": | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"[SYSTEM] | Using {DEVICE} type compute device.") | |
| # Variables | |
| SILENT_THRESHOLD = 0.01 | |
| CHAR_LIMIT = 2000 | |
| DEFAULT_INPUT = "" | |
| DEFAULT_VOICE = "af_heart" | |
| CHOICES = { | |
| "πΊπΈ πΊ Heart β€οΈ": "af_heart", | |
| "πΊπΈ πΊ Bella π₯": "af_bella", | |
| "πΊπΈ πΊ Nicole π§": "af_nicole", | |
| "πΊπΈ πΊ Aoede": "af_aoede", | |
| "πΊπΈ πΊ Kore": "af_kore", | |
| "πΊπΈ πΊ Sarah": "af_sarah", | |
| "πΊπΈ πΊ Nova": "af_nova", | |
| "πΊπΈ πΊ Sky": "af_sky", | |
| "πΊπΈ πΊ Alloy": "af_alloy", | |
| "πΊπΈ πΊ Jessica": "af_jessica", | |
| "πΊπΈ πΊ River": "af_river", | |
| "πΊπΈ πΉ Michael": "am_michael", | |
| "πΊπΈ πΉ Fenrir": "am_fenrir", | |
| "πΊπΈ πΉ Puck": "am_puck", | |
| "πΊπΈ πΉ Echo": "am_echo", | |
| "πΊπΈ πΉ Eric": "am_eric", | |
| "πΊπΈ πΉ Liam": "am_liam", | |
| "πΊπΈ πΉ Onyx": "am_onyx", | |
| "πΊπΈ πΉ Santa": "am_santa", | |
| "πΊπΈ πΉ Adam": "am_adam", | |
| "π¬π§ πΊ Emma": "bf_emma", | |
| "π¬π§ πΊ Isabella": "bf_isabella", | |
| "π¬π§ πΊ Alice": "bf_alice", | |
| "π¬π§ πΊ Lily": "bf_lily", | |
| "π¬π§ πΉ George": "bm_george", | |
| "π¬π§ πΉ Fable": "bm_fable", | |
| "π¬π§ πΉ Lewis": "bm_lewis", | |
| "π¬π§ πΉ Daniel": "bm_daniel", | |
| } | |
| PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"} | |
| PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kΛOkΙΙΉO" | |
| PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kΛQkΙΙΉQ" | |
| for v in CHOICES.values(): | |
| PIPELINES[v[0]].load_voice(v) | |
| MODEL = KModel().eval() | |
| css = ''' | |
| .gradio-container{max-width: 560px !important} | |
| h1{text-align:center} | |
| footer { | |
| visibility: hidden | |
| } | |
| ''' | |
| # Functions | |
| def trim_silence(audio, threshold=SILENT_THRESHOLD): | |
| abs_audio = np.abs(audio) | |
| indices = np.where(abs_audio > threshold)[0] | |
| if len(indices) == 0: return audio | |
| start = indices[0] | |
| end = indices[-1] + 1 | |
| return audio[start:end] | |
| def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1): | |
| text = text.strip()[:CHAR_LIMIT] + "." | |
| pipeline = PIPELINES[voice[0]] | |
| pack = pipeline.load_voice(voice) | |
| for _, ps, _ in pipeline(text, voice, speed): | |
| ref_s = pack[len(ps) - 1] | |
| audio = MODEL(ps, ref_s, speed) | |
| return (24000, trim_silence(audio.numpy())) | |
| def cloud(): | |
| print("[CLOUD] | Space maintained.") | |
| def gpu(): | |
| return | |
| # Initialize | |
| with gr.Blocks(css=css) as main: | |
| with gr.Column(): | |
| gr.Markdown("πͺ Instantly generate realistic voices using text input.") | |
| with gr.Column(): | |
| input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input") | |
| voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice") | |
| speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed") | |
| submit = gr.Button("βΆ") | |
| maintain = gr.Button("βοΈ") | |
| with gr.Column(): | |
| output = gr.Audio(label="Output") | |
| submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output) | |
| maintain.click(cloud, inputs=[], outputs=[], queue=False) | |
| main.launch(show_api=True) |