File size: 2,768 Bytes
455c922 23e6066 57d76c0 4328747 23e6066 4328747 57d76c0 086c039 60a4fae 086c039 21c7503 299936d 21c7503 9a43531 086c039 ab9af27 086c039 60af963 086c039 00c4b3b 086c039 f139b27 cddc3e0 16fcae1 cddc3e0 f139b27 e6757ef 23e6066 f572cd2 23e6066 4328747 455c922 54c9038 4328747 7ad56ab 42acf0b 52dc6f4 42acf0b 455c922 908a29b fdf341a 21e7589 a3419d8 908a29b cddc3e0 fdf341a e6f3aba fdf341a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import numpy as np
import gradio as gr
from scipy.io import wavfile
from espnet2.bin.tts_inference import Text2Speech
from arabic_pronounce import phonetise
import soundfile as sf
title = " Tunisian Text To Speech"
description = """
This is a demo for our Tunisian TTS system. You can write your dicritized tunisian text to synthesis the corresponding speech.
This project project was developed with the purpose of bridging the gap between high-resource and low-resource languages.
If you need help, feel free to drop an email here :
fethi.bougares@elyadata.com
rami.kammoun@algobrain.ai
imen.laouirine@elyadata.com
Authors :
* [Imen Laouirine](https://www.linkedin.com/in/imen-laouirine-9a557b209)
* [Rami Kammoun](https://www.linkedin.com/in/rami-kammoun/)
* [Fethi Bougares](https://www.linkedin.com/in/fethi-bougares/)
More implementation details could be found in[  ](https://github.com/elyadata/TunArTTS/tree/develop)
More in-depth details and insights are available in a released preprint. Please find the paper [here](paper_link).
If you use or refer to this model, please cite :
"""
examples = [
["ูููููููููู ู
ูุดูุงุนูุฑู ูููููููุฉ ูููููููููู ู
ูุดูุงุนูุฑู ูููููููุฉ"],
["ุณูุงุฑููู ููููู ููุฏููู ุดูู
ูุนูุฉ"],
["ุตูุงู
ู ููููุงุฑู ู
ููู ุฑูู
ูุถูุงููุ ููุงูู ุงูุนููุฏู ุขุดู ู
ูุงุฒูุงูููุ"],
["ุถูุญููููููู ุชูู
ูุฏู ุนูููู ุทููููู"],
["ุนูุงุฑููู ููุฎููููู ููููุตูููุญู ู
ูููุงูู"]
]
def text_to_phoneme(tun_text):
space_split = tun_text.split(" ")
res = ""
for i in range(len(space_split)):
res +=" "+phonetise(space_split[i])[0]
res = res.strip()
res = "sil "+res+" sil"
return res
def generate_tts(input_text):
phonemized_text = text_to_phoneme(input_text)
tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
wav = tts(f"sil {phonemized_text} sil")["wav"]
audio_data = wav.numpy()
sf.write('output.wav', audio_data, samplerate=22050)
def generate_audio(inputs):
generate_tts(inputs)
wav_file_path = "output.wav"
sr, audio_data = wavfile.read(wav_file_path)
return sr, audio_data
demo = gr.Interface(
title= title,
description=description,
fn=generate_audio,
examples = examples,
inputs= gr.Text(label="Input Text"),
outputs ="audio")
if __name__ == "__main__":
demo.launch() |