|
import numpy as np |
|
import gradio as gr |
|
|
|
from scipy.io import wavfile |
|
from espnet2.bin.tts_inference import Text2Speech |
|
from arabic_pronounce import phonetise |
|
import soundfile as sf |
|
|
|
|
|
title = " Tunisian Text To Speech" |
|
|
|
description = """ |
|
This is a demo for our Tunisian TTS system. You can write your dicritized tunisian text to synthesis the corresponding speech. |
|
This project project was developed with the purpose of bridging the gap between high-resource and low-resource languages. |
|
|
|
If you need help, feel free to drop an email here : |
|
fethi.bougares@elyadata.com |
|
rami.kammoun@algobrain.ai |
|
imen.laouirine@elyadata.com |
|
|
|
Authors : |
|
* [Imen Laouirine](https://www.linkedin.com/in/imen-laouirine-9a557b209) |
|
* [Rami Kammoun](https://www.linkedin.com/in/rami-kammoun/) |
|
* [Fethi Bougares](https://www.linkedin.com/in/fethi-bougares/) |
|
|
|
More implementation details could be found in[  ](https://github.com/elyadata/TunArTTS/tree/develop) |
|
More in-depth details and insights are available in a released preprint. Please find the paper [here](paper_link). |
|
If you use or refer to this model, please cite : |
|
|
|
""" |
|
|
|
examples = [ |
|
["ูููููููููู ู
ูุดูุงุนูุฑู ูููููููุฉ ูููููููููู ู
ูุดูุงุนูุฑู ูููููููุฉ"], |
|
["ุณูุงุฑููู ููููู ููุฏููู ุดูู
ูุนูุฉ"], |
|
["ุตูุงู
ู ููููุงุฑู ู
ููู ุฑูู
ูุถูุงููุ ููุงูู ุงูุนููุฏู ุขุดู ู
ูุงุฒูุงูููุ"], |
|
["ุถูุญููููููู ุชูู
ูุฏู ุนูููู ุทููููู"], |
|
["ุนูุงุฑููู ููุฎููููู ููููุตูููุญู ู
ูููุงูู"] |
|
] |
|
|
|
|
|
def text_to_phoneme(tun_text): |
|
space_split = tun_text.split(" ") |
|
res = "" |
|
for i in range(len(space_split)): |
|
res +=" "+phonetise(space_split[i])[0] |
|
res = res.strip() |
|
res = "sil "+res+" sil" |
|
return res |
|
|
|
def generate_tts(input_text): |
|
phonemized_text = text_to_phoneme(input_text) |
|
tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl") |
|
wav = tts(f"sil {phonemized_text} sil")["wav"] |
|
audio_data = wav.numpy() |
|
sf.write('output.wav', audio_data, samplerate=22050) |
|
|
|
def generate_audio(inputs): |
|
generate_tts(inputs) |
|
wav_file_path = "output.wav" |
|
|
|
sr, audio_data = wavfile.read(wav_file_path) |
|
|
|
return sr, audio_data |
|
|
|
|
|
demo = gr.Interface( |
|
title= title, |
|
description=description, |
|
fn=generate_audio, |
|
examples = examples, |
|
inputs= gr.Text(label="Input Text"), |
|
outputs ="audio") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |