Spaces:

softcatala
/

comparativa-tts-catala

Running

App Files Files Community

comparativa-tts-catala / app.py

ccoreilly

Manté puntuació

3729983 over 1 year ago

raw

history blame

4.41 kB

	from engine import Piper
	import tempfile
	from typing import Optional
	from TTS.config import load_config
	import gradio as gr
	import numpy as np
	import os
	import json
	from TTS.utils.manage import ModelManager
	from TTS.utils.synthesizer import Synthesizer
	from espeak_phonemizer import Phonemizer

	MAX_TXT_LEN = 100

	SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']

	fonemitzador = Phonemizer("ca")

	def carrega_bsc():
	model_path = os.getcwd() + "/models/bsc/best_model.pth"
	config_path = os.getcwd() + "/models/bsc/config.json"
	speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
	vocoder_path = None
	vocoder_config_path = None

	synthesizer = Synthesizer(
	model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
	)

	return synthesizer

	def carrega_collectivat():
	model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
	config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
	vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
	vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
	synthesizer = Synthesizer(
	model_path, config_path, None, None, vocoder_path, vocoder_config_path
	)

	return synthesizer

	def carrega_piper():
	return Piper(os.getcwd() + "/models/piper/ca-upc_ona-x-low.onnx")


	model_bsc = carrega_bsc()
	SPEAKERS = model_bsc.tts_model.speaker_manager.speaker_names

	model_collectivat = carrega_collectivat()

	model_piper = carrega_piper()

	def tts(text, speaker_idx):
	if len(text) > MAX_TXT_LEN:
	text = text[:MAX_TXT_LEN]
	print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
	print(text)

	# synthesize
	wav_bsc = model_bsc.tts(text, speaker_idx)
	wav_coll = model_collectivat.tts(text)
	wav_piper = model_piper.synthesize(text)

	#return (model_bsc.tts_config.audio["sample_rate"], wav_bsc), (22000, wav_coll), (16000, wav_piper)

	# return output
	fp_bsc = ""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	model_bsc.save_wav(wav_bsc, fp)
	fp_bsc = fp.name

	fp_coll = ""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	model_collectivat.save_wav(wav_coll, fp)
	fp_coll = fp.name

	fp_piper = ""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	fp.write(wav_piper)
	fp_piper = fp.name

	fonemes = fonemitzador.phonemize(text)

	return fonemes, fp_bsc, fp_coll, fp_piper


	description="""
	Amb aquesta aplicació podeu sintetitzar text a veu amb els últims models lliures pel català.

	1. Model multi-parlant VITS entrenat pel BSC (Projecte Aina)
	https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker

	2. Model Fastspeech entrenat per Col·lectivat
	https://github.com/CollectivaT-dev/TTS-API

	3. Model VITS entrenat per Piper/Home Assistant
	https://github.com/rhasspy/piper

	Els dós últims models han estat entrenats amb la veu d'Ona de FestCAT, que va servir com a base per a les veus catalanes de Festival

	El primer model conté moltes veus de qualitat variable. Podeu seleccionar-ne una altre al desplegable. La veu d'Ona esta sel·leccionada per defecte per la comparativa.

	Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
	https://github.com/projecte-aina/espeak-ng

	NOTA: El model de col·lectivat treballa amb grafemes pel que no fa servir espeak com a fonemitzador.
	"""
	article= ""

	iface = gr.Interface(
	fn=tts,
	inputs=[
	gr.Textbox(
	label="Text",
	value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
	),
	gr.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, value="ona")
	],
	outputs=[
	gr.Markdown(label="Fonemes"),
	gr.Audio(label="BSC VITS",type="filepath"),
	gr.Audio(label="Collectivat Fastspeech",type="filepath"),
	gr.Audio(label="Piper VITS",type="filepath")
	],
	title="Comparativa de síntesi lliure en català️",
	description=description,
	article=article,
	allow_flagging="never",
	layout="vertical",
	live=False
	)
	iface.launch(server_name="0.0.0.0", server_port=7860)