ccoreilly's picture
fix import
7ae4987
raw
history blame
2.2 kB
from engine import Piper
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
import os
import json
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MAX_TXT_LEN = 100
SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
def carrega_bsc():
model_path = os.getcwd() + "/models/bsc/best_model.pth"
config_path = os.getcwd() + "/models/bsc/config.json"
speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
vocoder_path = None
vocoder_config_path = None
synthesizer = Synthesizer(
model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
)
return synthesizer
model_bsc = carrega_bsc()
SPEAKERS = model_bsc.speakers
def tts(text, speaker_idx):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text)
speakers_maping_path = os.getcwd() + "/speaker_map.json"
# Map speaker aliases to speaker ids
with open(speakers_maping_path, 'r') as fp:
maping = json.load(fp)
#speaker_idx = maping[speaker_idx]
# synthesize
wavs = model_bsc.tts(text, speaker_idx)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
model_bsc.save_wav(wavs, fp)
return fp.name
description="""
1️⃣ Introdueix el text a sintetitzar.
2️⃣ Selecciona una veu en el desplegable.
3️⃣ Gaudeix!
"""
article= ""
iface = gr.Interface(
fn=tts,
inputs=[
gr.inputs.Textbox(
label="Text",
default="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
),
gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
],
outputs=gr.outputs.Audio(label="Output",type="filepath"),
title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
description=description,
article=article,
allow_flagging="never",
layout="vertical",
live=False
)
iface.launch(share=False)