tts / app.py
inspire4dev's picture
Update app.py
5d2d68e verified
import tempfile
import edge_tts
import gradio as gr
from transformers import pipeline
import pyarabic.araby as araby
language_dict = {
"English": {
"Jenny": "en-US-JennyNeural",
"Guy": "en-US-GuyNeural",
"Ana": "en-US-AnaNeural",
"Aria": "en-US-AriaNeural",
"Christopher": "en-US-ChristopherNeural",
"Eric": "en-US-EricNeural",
"Michelle": "en-US-MichelleNeural",
"Roger": "en-US-RogerNeural",
"Natasha": "en-AU-NatashaNeural",
"William": "en-AU-WilliamNeural",
"Clara": "en-CA-ClaraNeural",
"Liam": "en-CA-LiamNeural",
"Libby": "en-GB-LibbyNeural",
"Maisie": "en-GB-MaisieNeural",
"Ryan": "en-GB-RyanNeural",
"Sonia": "en-GB-SoniaNeural",
"Thomas": "en-GB-ThomasNeural",
"Sam": "en-HK-SamNeural",
"Yan": "en-HK-YanNeural",
"Connor": "en-IE-ConnorNeural",
"Emily": "en-IE-EmilyNeural",
"Neerja": "en-IN-NeerjaNeural",
"Prabhat": "en-IN-PrabhatNeural",
"Asilia": "en-KE-AsiliaNeural",
"Chilemba": "en-KE-ChilembaNeural",
"Abeo": "en-NG-AbeoNeural",
"Ezinne": "en-NG-EzinneNeural",
"Mitchell": "en-NZ-MitchellNeural",
"James": "en-PH-JamesNeural",
"Rosa": "en-PH-RosaNeural",
"Luna": "en-SG-LunaNeural",
"Wayne": "en-SG-WayneNeural",
"Elimu": "en-TZ-ElimuNeural",
"Imani": "en-TZ-ImaniNeural",
"Leah": "en-ZA-LeahNeural",
"Luke": "en-ZA-LukeNeural"
},
"Vietnamese": {
"HoaiMy": "vi-VN-HoaiMyNeural",
"NamMinh": "vi-VN-NamMinhNeural"
}
}
pipe = pipeline("text2text-generation", model="mush42/fine-tashkeel")
async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False, speed=0, pitch=0):
# Remove diacritics from Arabic text then add tashkeel
if language_code == "Arabic" and tashkeel_checkbox:
text = pipe(araby.strip_diacritics(text))[0]["generated_text"].strip()
# Get the voice for the selected language and speaker
voice = language_dict[language_code][speaker]
rate_str = f"{speed:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return text, tmp_path
def get_speakers(language):
print(language)
speakers = list(language_dict[language].keys())
return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
default_language = None
default_speaker = None
with gr.Blocks(title="Inspire4DEV - Đọc Văn Bản") as demo:
gr.HTML("<center><h1>CHUYỂN VĂN BẢN THÀNH GIỌNG ĐỌC</h1></center>")
gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} ngôn ngữ được hỗ trợ {', '.join(language_dict.keys())}</h3>")
gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(lines=5, label="Văn bản", placeholder="Hãy nhập văn bản cần chuyển thành giọng đọc")
language = gr.Dropdown(
choices=list(language_dict.keys()), value=default_language, label="Lựa chọn ngôn ngữ", interactive=True
)
speaker = gr.Dropdown(choices=[], value=default_speaker, label="Giọng Đọc", interactive=False)
tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
speedrate = gr.Slider(minimum=-50, maximum=50, value=0, label="Tốc độ đọc (%)", step=1)
pitchadj = gr.Slider(minimum=-20, maximum=20, value=0, label="Tông giọng (Hz)", step=1)
run_btn = gr.Button(value="TẠO GIỌNG ĐỌC", variant="primary")
with gr.Column():
output_text = gr.Textbox(label="Văn bản sau khi chuyển đổi")
output_audio = gr.Audio(type="filepath", label="Âm thanh")
language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox, speedrate, pitchadj], outputs=[output_text, output_audio])
if __name__ == "__main__":
demo.queue().launch(share=False)