###################################### imports ###################################### import torch from TTS.api import TTS import gradio as gr import os import spaces import yaml ###################################### utilities ###################################### def get_config(): # get config path config_path = os.environ["CONFIG_PATH"] # Parse the YAML file with open(config_path, 'r') as file: config = yaml.safe_load(file) return config def init_TTS(config): # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize the TTS model tts = TTS(config['inference']['model']).to(device) return tts @spaces.GPU def generate_speech(voice_choice, markdown, microphone, text): # Generate speech using the provided text, speaker voice, and language if voice_choice=="Record": speaker = microphone else: speaker = config['inference']['speaker_wav'] tts.tts_to_file(text=text, file_path=config['inference']['file_path'], speaker_wav=speaker, language=config['inference']['language']) return config['inference']['file_path'] ###################################### main ###################################### def UI(config): # gradio elements voice_choice = gr.Radio(label="Record or use a predefined voice.", choices=["Record", "Predefined (Nancy)"], value="Record") markdown = gr.Markdown("""If recording, speak loud and clearly. Recommended speaking track '*printing, in the only sense with which we are at present concerned, differs from, most if not all, the arts and crafts in the exhibition.*'""") microphone = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio') enter_text = gr.Textbox(label="Enter your text") # Create the Gradio interface demo = gr.Interface( fn=generate_speech, inputs=[ voice_choice, markdown, microphone, enter_text ], outputs="audio", title="Voice cloning and Synthesis with Coqui-XTTS", description="Clone your voice and Synthesize speech using predefined target voice and language. It takes a 10-20 seconds to download the model, so wait to record until the app is *Running on Zero* to begin." ) # Launch the interface demo.launch() return 0 ###################################### Execute ###################################### if __name__ == "__main__": # Get config config = get_config() # initialize TTS tts = init_TTS(config) # run program UI(config)