TunArTTS / app.py
imenLa's picture
Update app.py
60a4fae verified
raw
history blame contribute delete
2.77 kB
import numpy as np
import gradio as gr
from scipy.io import wavfile
from espnet2.bin.tts_inference import Text2Speech
from arabic_pronounce import phonetise
import soundfile as sf
title = " Tunisian Text To Speech"
description = """
This is a demo for our Tunisian TTS system. You can write your dicritized tunisian text to synthesis the corresponding speech.
This project project was developed with the purpose of bridging the gap between high-resource and low-resource languages.
If you need help, feel free to drop an email here :
fethi.bougares@elyadata.com
rami.kammoun@algobrain.ai
imen.laouirine@elyadata.com
Authors :
* [Imen Laouirine](https://www.linkedin.com/in/imen-laouirine-9a557b209)
* [Rami Kammoun](https://www.linkedin.com/in/rami-kammoun/)
* [Fethi Bougares](https://www.linkedin.com/in/fethi-bougares/)
More implementation details could be found in[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/elyadata/TunArTTS/tree/develop)
More in-depth details and insights are available in a released preprint. Please find the paper [here](paper_link).
If you use or refer to this model, please cite :
"""
examples = [
["ูŠู’ูƒูู†ู‘ู’ู„ููˆ ู…ูŽุดูŽุงุนูุฑู’ ู‚ู’ูˆููŠู‘ูŽุฉ ูŠู’ูƒูู†ู‘ู’ู„ููˆ ู…ูŽุดูŽุงุนูุฑู’ ู‚ู’ูˆููŠู‘ูŽุฉ"],
["ุณูŽุงุฑูŽู‚ู’ ูˆู’ูููŠ ูŠูุฏู‘ููˆ ุดูŽู…ู’ุนูŽุฉ"],
["ุตูŽุงู…ู’ ู†ู’ู‡ูŽุงุฑู’ ู…ูู†ู’ ุฑูู…ู’ุถูŽุงู†ู’ุŒ ู‚ูŽุงู„ู’ ุงู„ุนููŠุฏู’ ุขุดู’ ู…ูŽุงุฒูŽุงู„ููˆุŸ"],
["ุถูŽุญู’ูƒููˆู„ููˆ ุชู’ู…ูŽุฏู’ ุนู’ู„ูŽู‰ ุทููˆู„ููˆ"],
["ุนูŽุงุฑููƒู’ ูˆู’ุฎูŽู„ู‘ููŠ ู„ูู„ู’ุตูู„ู’ุญู’ ู…ู’ูƒูŽุงู†ู’"]
]
def text_to_phoneme(tun_text):
space_split = tun_text.split(" ")
res = ""
for i in range(len(space_split)):
res +=" "+phonetise(space_split[i])[0]
res = res.strip()
res = "sil "+res+" sil"
return res
def generate_tts(input_text):
phonemized_text = text_to_phoneme(input_text)
tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
wav = tts(f"sil {phonemized_text} sil")["wav"]
audio_data = wav.numpy()
sf.write('output.wav', audio_data, samplerate=22050)
def generate_audio(inputs):
generate_tts(inputs)
wav_file_path = "output.wav"
sr, audio_data = wavfile.read(wav_file_path)
return sr, audio_data
demo = gr.Interface(
title= title,
description=description,
fn=generate_audio,
examples = examples,
inputs= gr.Text(label="Input Text"),
outputs ="audio")
if __name__ == "__main__":
demo.launch()