|
import gradio as gr |
|
from TTS.api import TTS |
|
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True) |
|
|
|
def predict(prompt, language, audio_file_pth): |
|
|
|
tts.tts_to_file(text=prompt, |
|
file_path="output.wav", |
|
speaker_wav=audio_file_pth, |
|
language=language) |
|
|
|
return gr.make_waveform(audio="output.wav",) |
|
|
|
|
|
title = "XTTS: MVP" |
|
|
|
gr.Interface( |
|
fn=predict, |
|
inputs=[ |
|
gr.Textbox(label="Prompt", info = "One or two sentences at a time is better* (max: 10)", placeholder = "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",), |
|
gr.Dropdown(choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cz", "ar", "zh"], max_choices=1), |
|
gr.Audio(label="Upload Speaker WAV", type="filepath"), |
|
], |
|
outputs=[ |
|
gr.Video(label="Synthesised Speech"), |
|
], |
|
title=title, |
|
).launch(debug=True) |