from subprocess import call import gradio as gr import os from TTS.api import TTS from TTS.utils.synthesizer import Synthesizer # List available 🐸TTS models and choose the first one # all_models = TTS.list_models() # for model in all_models: # print(model) # print("Using model: ", all_models[0]) # model = all_models[0] # Init TTS MAX_TXT_LEN = 500 print("Downloading model...", '') voiceCloneModel = TTS('tts_models/multilingual/multi-dataset/your_tts') # tts = TTS("tts_models/de/thorsten/tacotron2-DDC") def run_cmd(command): try: print(command) call(command) except KeyboardInterrupt: print("Process interrupted") sys.exit(1) def inference(text, speaker): if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] print( f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") if (speaker == 'Speaker-1'): speaker = 'input/amitabh.mp3' elif (speaker == 'Speaker-2'): speaker = 'input/amrish.mp3' elif (speaker == 'Speaker-3'): speaker = 'input/obama.mp3' elif (speaker == 'Speaker-4'): speaker = 'input/trump.wav' elif (speaker == 'Rock'): speaker = 'input/sample/Dwayne-Johnson-sample.wav' elif (speaker == 'Elon'): speaker = 'input/sample/Elon-Sample.wav' elif (speaker == 'Obama'): speaker = 'input/sample/Obama-sample.wav' elif (speaker == 'Tony'): speaker = 'input/sample/tony-stark-2.wav' elif (speaker == 'Madara Uchiha'): speaker = 'input/sample/Madara.mp3' else: speaker = 'input/z-default.wav' # print("speaker: ", speaker) # cmd = ['tts', '--text', text, '--out_path', 'output/tts_output.wav'] # run_cmd(cmd) # Text to speech to a file # tts = TTS(model="tts_models/multilingual/multi-dataset/your_tts", # progress_bar=False, gpu=True) voiceCloneModel.tts_to_file(text, speaker_wav=speaker, language="en", file_path="output/output.wav") # for i in range(len(model.languages)): # model.tts_to_file(text=text, # speaker=model.speakers[i], language=model.languages[0], file_path='output/output-'+str(i)+'.wav') return 'output/output.wav' sampleInput = ['Speaker-1', 'Speaker-2', 'Speaker-3', 'Speaker-4', 'Rock', 'Elon', 'Obama', 'Tony', 'Madara Uchiha'] inputs = [gr.Textbox(lines=5, label="Input Text"), gr.Dropdown( sampleInput, label="Sample Input", default="Speaker-1",) ] outputs = gr.Audio(type="filepath", label="Output Audio") title = "Text To Speech" description = "An example of using TTS to generate speech from text." article = "" examples = [ ["This is an open-source library that generates synthetic speech"] ] gr.Interface( inference, inputs, outputs, verbose=True, title=title, description=description, article=article, examples=examples, enable_queue=True, allow_flagging="never", ).launch(debug=True)