Spaces:

Elyadata
/

TunArTTS

Running

App Files Files Community

TunArTTS / app.py

imenLa

Update app.py

60a4fae verified over 1 year ago

raw

history blame contribute delete

2.77 kB

	import numpy as np
	import gradio as gr

	from scipy.io import wavfile
	from espnet2.bin.tts_inference import Text2Speech
	from arabic_pronounce import phonetise
	import soundfile as sf


	title = " Tunisian Text To Speech"

	description = """
	This is a demo for our Tunisian TTS system. You can write your dicritized tunisian text to synthesis the corresponding speech.
	This project project was developed with the purpose of bridging the gap between high-resource and low-resource languages.

	If you need help, feel free to drop an email here :
	fethi.bougares@elyadata.com
	rami.kammoun@algobrain.ai
	imen.laouirine@elyadata.com

	Authors :
	* [Imen Laouirine](https://www.linkedin.com/in/imen-laouirine-9a557b209)
	* [Rami Kammoun](https://www.linkedin.com/in/rami-kammoun/)
	* [Fethi Bougares](https://www.linkedin.com/in/fethi-bougares/)

	More implementation details could be found in[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/elyadata/TunArTTS/tree/develop)
	More in-depth details and insights are available in a released preprint. Please find the paper [here](paper_link).
	If you use or refer to this model, please cite :

	"""

	examples = [
	["يْكِنّْلُو مَشَاعِرْ قْوِيَّة يْكِنّْلُو مَشَاعِرْ قْوِيَّة"],
	["سَارَقْ وْفِي يِدُّو شَمْعَة"],
	["صَامْ نْهَارْ مِنْ رُمْضَانْ، قَالْ العِيدْ آشْ مَازَالُو؟"],
	["ضَحْكُولُو تْمَدْ عْلَى طُولُو"],
	["عَارِكْ وْخَلِّي لِلْصُلْحْ مْكَانْ"]
	]


	def text_to_phoneme(tun_text):
	space_split = tun_text.split(" ")
	res = ""
	for i in range(len(space_split)):
	res +=" "+phonetise(space_split[i])[0]
	res = res.strip()
	res = "sil "+res+" sil"
	return res

	def generate_tts(input_text):
	phonemized_text = text_to_phoneme(input_text)
	tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
	wav = tts(f"sil {phonemized_text} sil")["wav"]
	audio_data = wav.numpy()
	sf.write('output.wav', audio_data, samplerate=22050)

	def generate_audio(inputs):
	generate_tts(inputs)
	wav_file_path = "output.wav"

	sr, audio_data = wavfile.read(wav_file_path)

	return sr, audio_data


	demo = gr.Interface(
	title= title,
	description=description,
	fn=generate_audio,
	examples = examples,
	inputs= gr.Text(label="Input Text"),
	outputs ="audio")

	if __name__ == "__main__":
	demo.launch()