coqui-tts-cat / app.py
ccoreilly's picture
import & fix
ccb20f4
raw
history blame
2.19 kB
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
import os
import json
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MAX_TXT_LEN = 100
SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
def tts(text: str, speaker_idx: str=None):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text)
model_path = os.getcwd() + "/best_model.pth"
config_path = os.getcwd() + "/config.json"
speakers_file_path = os.getcwd() + "/speakers.pth"
speakers_maping_path = os.getcwd() + "/speaker_map.json"
vocoder_path = None
vocoder_config_path = None
synthesizer = Synthesizer(
model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
)
# Map speaker aliases to speaker ids
with open(speakers_maping_path, 'r') as fp:
maping = json.load(fp)
speaker_idx = maping[speaker_idx]
# synthesize
if synthesizer is None:
raise NameError("model not found")
wavs = synthesizer.tts(text, speaker_idx)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
description="""
1️⃣ Introdueix el text a sintetitzar.
2️⃣ Selecciona una veu en el desplegable.
3️⃣ Gaudeix!
"""
article= ""
iface = gr.Interface(
fn=tts,
inputs=[
gr.inputs.Textbox(
label="Text",
default="Introdueix el text a sintetitzar.",
),
gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
],
outputs=gr.outputs.Audio(label="Output",type="filepath"),
title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
theme="grass",
description=description,
article=article,
allow_flagging=False,
flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
layout="vertical",
live=False
)
iface.launch(share=False)