tts

Runtime error

App Files Files Community

inspire4dev commited on Sep 4, 2024

Commit

5d2d68e

verified ·

1 Parent(s): d81bde6

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -68

app.py CHANGED Viewed

@@ -1,79 +1,106 @@
-import gradio as gr
-import edge_tts
-import asyncio
 import tempfile
-import os
-# Get all available voices
-async def get_voices():
-    voices = await edge_tts.list_voices()
-    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
-# Text-to-speech function
-async def text_to_speech(text, voice, rate, pitch):
-    if not text.strip():
-        return None, gr.Warning("Please enter text to convert.")
-    if not voice:
-        return None, gr.Warning("Please select a voice.")
-    voice_short_name = voice.split(" - ")[0]
-    rate_str = f"{rate:+d}%"
     pitch_str = f"{pitch:+d}Hz"
-    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-        tmp_path = tmp_file.name
-        await communicate.save(tmp_path)
-    return tmp_path, None
-# Gradio interface function
-def tts_interface(text, voice, rate, pitch):
-    audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
-    return audio, warning
-# Create Gradio application
-import gradio as gr
-async def create_demo():
-    voices = await get_voices()
-    description = """
-    Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
-    🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
-    Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
-    Transform your words into stunning, professional-quality videos in just a few clicks.
-    ✨ Features:
-    • Convert text to engaging videos with customizable visuals
-    • Choose from 40+ languages and 300+ voices
-    • Perfect for creating audiobooks, storytelling, and language learning materials
-    • Ideal for educators, content creators, and language enthusiasts
-    Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
-    """
-    demo = gr.Interface(
-        fn=tts_interface,
-        inputs=[
-            gr.Textbox(label="Input Text", lines=5),
-            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
-            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
-            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
-        ],
-        outputs=[
-            gr.Audio(label="Generated Audio", type="filepath"),
-            gr.Markdown(label="Warning", visible=False)
-        ],
-        title="Edge TTS Text-to-Speech",
-        description=description,
-        article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
-        analytics_enabled=False,
-        allow_flagging=False
-    )
-    return demo
-# Run the application
 if __name__ == "__main__":
-    demo = asyncio.run(create_demo())
-    demo.launch()

 import tempfile
+import edge_tts
+import gradio as gr
+from transformers import pipeline
+import pyarabic.araby as araby
+language_dict = {
+  "English": {
+    "Jenny": "en-US-JennyNeural",
+    "Guy": "en-US-GuyNeural",
+    "Ana": "en-US-AnaNeural",
+    "Aria": "en-US-AriaNeural",
+    "Christopher": "en-US-ChristopherNeural",
+    "Eric": "en-US-EricNeural",
+    "Michelle": "en-US-MichelleNeural",
+    "Roger": "en-US-RogerNeural",
+    "Natasha": "en-AU-NatashaNeural",
+    "William": "en-AU-WilliamNeural",
+    "Clara": "en-CA-ClaraNeural",
+    "Liam": "en-CA-LiamNeural",
+    "Libby": "en-GB-LibbyNeural",
+    "Maisie": "en-GB-MaisieNeural",
+    "Ryan": "en-GB-RyanNeural",
+    "Sonia": "en-GB-SoniaNeural",
+    "Thomas": "en-GB-ThomasNeural",
+    "Sam": "en-HK-SamNeural",
+    "Yan": "en-HK-YanNeural",
+    "Connor": "en-IE-ConnorNeural",
+    "Emily": "en-IE-EmilyNeural",
+    "Neerja": "en-IN-NeerjaNeural",
+    "Prabhat": "en-IN-PrabhatNeural",
+    "Asilia": "en-KE-AsiliaNeural",
+    "Chilemba": "en-KE-ChilembaNeural",
+    "Abeo": "en-NG-AbeoNeural",
+    "Ezinne": "en-NG-EzinneNeural",
+    "Mitchell": "en-NZ-MitchellNeural",
+    "James": "en-PH-JamesNeural",
+    "Rosa": "en-PH-RosaNeural",
+    "Luna": "en-SG-LunaNeural",
+    "Wayne": "en-SG-WayneNeural",
+    "Elimu": "en-TZ-ElimuNeural",
+    "Imani": "en-TZ-ImaniNeural",
+    "Leah": "en-ZA-LeahNeural",
+    "Luke": "en-ZA-LukeNeural"
+  },
+  "Vietnamese": {
+    "HoaiMy": "vi-VN-HoaiMyNeural",
+    "NamMinh": "vi-VN-NamMinhNeural"
+  }
+}
+pipe = pipeline("text2text-generation", model="mush42/fine-tashkeel")
+async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False, speed=0, pitch=0):
+  # Remove diacritics from Arabic text then add tashkeel
+    if language_code == "Arabic" and tashkeel_checkbox:
+      text = pipe(araby.strip_diacritics(text))[0]["generated_text"].strip()
+    # Get the voice for the selected language and speaker
+    voice = language_dict[language_code][speaker]
+    rate_str = f"{speed:+d}%"
     pitch_str = f"{pitch:+d}Hz"
+    communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+      tmp_path = tmp_file.name
+      await communicate.save(tmp_path)
+    return text, tmp_path
+def get_speakers(language):
+    print(language)
+    speakers = list(language_dict[language].keys())
+    return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
+default_language = None
+default_speaker = None
+with gr.Blocks(title="Inspire4DEV - Đọc Văn Bản") as demo:
+    gr.HTML("<center><h1>CHUYỂN VĂN BẢN THÀNH GIỌNG ĐỌC</h1></center>")
+    gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} ngôn ngữ được hỗ trợ {', '.join(language_dict.keys())}</h3>")
+    gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(lines=5, label="Văn bản", placeholder="Hãy nhập văn bản cần chuyển thành giọng đọc")
+            language = gr.Dropdown(
+                choices=list(language_dict.keys()), value=default_language, label="Lựa chọn ngôn ngữ", interactive=True
+            )
+            speaker = gr.Dropdown(choices=[], value=default_speaker, label="Giọng Đọc", interactive=False)
+            tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
+            speedrate = gr.Slider(minimum=-50, maximum=50, value=0, label="Tốc độ đọc (%)", step=1)
+            pitchadj = gr.Slider(minimum=-20, maximum=20, value=0, label="Tông giọng (Hz)", step=1)
+            run_btn = gr.Button(value="TẠO GIỌNG ĐỌC", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(label="Văn bản sau khi chuyển đổi")
+            output_audio = gr.Audio(type="filepath", label="Âm thanh")
+    language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
+    run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox, speedrate, pitchadj], outputs=[output_text, output_audio])
 if __name__ == "__main__":
+    demo.queue().launch(share=False)