Spaces:
Running
Running
| import gradio as gr | |
| import tempfile | |
| from TTS.api import TTS | |
| from huggingface_hub import hf_hub_download | |
| import torch | |
| CUDA = torch.cuda.is_available() | |
| REPO_ID = "collectivat/catotron-ona" | |
| VOICE_CONVERSION_MODELS = { | |
| 'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24', | |
| 'openvoice_v1': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v1', | |
| 'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2', | |
| } | |
| my_title = "Catotron Text-to-Speech with Voice Conversion" | |
| my_description = "This space allows speaker conversion on Fast Speech based 🐸 [Catotron](https://huggingface.co/collectivat/catotron-ona)." | |
| my_examples = [ | |
| ["Catotron, síntesi de la parla obert i lliure en català.", True, None, 'freevc24'], | |
| ["Leonor Ferrer Girabau va ser una delineant, mestra i activista barcelonina, nascuda al carrer actual de la Concòrdia del Poble-sec, que es va convertir en la primera dona a obtenir el títol de delineant a Catalunya i a l'estat.", True, None, 'freevc24'], | |
| ["S'espera un dia anticiclònic amb temperatures suaus i vent fluix.", False, None, 'freevc24'] | |
| ] | |
| my_inputs = [ | |
| gr.Textbox(lines=5, label="Input Text"), | |
| gr.Checkbox(label="Split Sentences", value=False), | |
| gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"), | |
| gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())), | |
| ] | |
| my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True) | |
| best_model_path = hf_hub_download(repo_id=REPO_ID, filename="fast-speech_best_model.pth") | |
| config_path = hf_hub_download(repo_id=REPO_ID, filename="fast-speech_config.json") | |
| vocoder_model = hf_hub_download(repo_id=REPO_ID, filename="ljspeech--hifigan_v2_model_file.pth") | |
| vocoder_config = hf_hub_download(repo_id=REPO_ID, filename="ljspeech--hifigan_v2_config.json") | |
| api = TTS(model_path=best_model_path, config_path=config_path, vocoder_path=vocoder_model, vocoder_config_path=vocoder_config).to("cuda" if CUDA else "cpu") | |
| # pre-download voice conversion models | |
| for model in VOICE_CONVERSION_MODELS.values(): | |
| api.load_vc_model_by_name(model, gpu=CUDA) | |
| def tts(text: str, split_sentences: bool = False, speaker_wav: str = None, voice_cv_model: str = 'freevc24'): | |
| # replace oov characters | |
| text = text.replace("\n", ". ") | |
| text = text.replace("(", ",") | |
| text = text.replace(")", ",") | |
| text = text.replace(";", ",") | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
| if speaker_wav: | |
| api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA) | |
| api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences) | |
| else: | |
| api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences) | |
| return fp.name | |
| iface = gr.Interface( | |
| fn=tts, | |
| inputs=my_inputs, | |
| outputs=my_outputs, | |
| title=my_title, | |
| description=my_description, | |
| examples=my_examples, | |
| cache_examples=True | |
| ) | |
| iface.launch() |