import tempfile from typing import Optional import gradio as gr import subprocess import numpy as np from TTS.utils.synthesizer import Synthesizer MAX_TXT_LEN = 1000 subprocess.check_output("git lfs install", shell=True) subprocess.check_output("git clone https://huggingface.co/DigitalUmuganda/Kinyarwanda_YourTTS", shell=True) def generate_audio(text): if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") synthesizer = Synthesizer("./Kinyarwanda_YourTTS/model.pth", "Kinyarwanda_YourTTS/config.json", tts_speakers_file="Kinyarwanda_YourTTS/speakers.pth", encoder_checkpoint="Kinyarwanda_YourTTS/SE_checkpoint.pth.tar", encoder_config="Kinyarwanda_YourTTS/config_se.json",) wav = synthesizer.tts(text, speaker_wav="Kinyarwanda_YourTTS/conditioning_audio.wav") with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wav, fp) return fp.name iface = gr.Interface( fn=generate_audio, inputs=[ gr.inputs.Textbox( label="Input Text", default="Muraho neza! nizere ko umunsi uri kugenda neza.", ), ], outputs=gr.outputs.Audio(type="filepath",label="Output"), #outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"), title="Kinyarwanda tts Demo", description="Kinyarwanda tts build By Digital Umuganda, using the Bible dataset, and trained on YourTTS model.", allow_flagging=False, flagging_options=['error', 'bad-quality', 'wrong-pronounciation'], layout="vertical", live=False ) iface.launch(share=False)