inspire4dev commited on
Commit
5d2d68e
·
verified ·
1 Parent(s): d81bde6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -68
app.py CHANGED
@@ -1,79 +1,106 @@
1
- import gradio as gr
2
- import edge_tts
3
- import asyncio
4
  import tempfile
5
- import os
6
 
7
- # Get all available voices
8
- async def get_voices():
9
- voices = await edge_tts.list_voices()
10
- return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Text-to-speech function
13
- async def text_to_speech(text, voice, rate, pitch):
14
- if not text.strip():
15
- return None, gr.Warning("Please enter text to convert.")
16
- if not voice:
17
- return None, gr.Warning("Please select a voice.")
 
18
 
19
- voice_short_name = voice.split(" - ")[0]
20
- rate_str = f"{rate:+d}%"
 
21
  pitch_str = f"{pitch:+d}Hz"
22
- communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
 
23
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
- tmp_path = tmp_file.name
25
- await communicate.save(tmp_path)
26
- return tmp_path, None
27
 
28
- # Gradio interface function
29
- def tts_interface(text, voice, rate, pitch):
30
- audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
31
- return audio, warning
32
 
33
- # Create Gradio application
34
- import gradio as gr
35
 
36
- async def create_demo():
37
- voices = await get_voices()
38
-
39
- description = """
40
- Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
41
-
42
- 🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
43
-
44
- Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
45
- Transform your words into stunning, professional-quality videos in just a few clicks.
46
-
47
- Features:
48
- • Convert text to engaging videos with customizable visuals
49
- Choose from 40+ languages and 300+ voices
50
- Perfect for creating audiobooks, storytelling, and language learning materials
51
- • Ideal for educators, content creators, and language enthusiasts
52
-
53
- Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
54
- """
55
-
56
- demo = gr.Interface(
57
- fn=tts_interface,
58
- inputs=[
59
- gr.Textbox(label="Input Text", lines=5),
60
- gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
61
- gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
62
- gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
63
- ],
64
- outputs=[
65
- gr.Audio(label="Generated Audio", type="filepath"),
66
- gr.Markdown(label="Warning", visible=False)
67
- ],
68
- title="Edge TTS Text-to-Speech",
69
- description=description,
70
- article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
71
- analytics_enabled=False,
72
- allow_flagging=False
73
- )
74
- return demo
75
 
76
- # Run the application
77
  if __name__ == "__main__":
78
- demo = asyncio.run(create_demo())
79
- demo.launch()
 
 
 
 
1
  import tempfile
 
2
 
3
+ import edge_tts
4
+ import gradio as gr
5
+ from transformers import pipeline
6
+ import pyarabic.araby as araby
7
+
8
+ language_dict = {
9
+ "English": {
10
+ "Jenny": "en-US-JennyNeural",
11
+ "Guy": "en-US-GuyNeural",
12
+ "Ana": "en-US-AnaNeural",
13
+ "Aria": "en-US-AriaNeural",
14
+ "Christopher": "en-US-ChristopherNeural",
15
+ "Eric": "en-US-EricNeural",
16
+ "Michelle": "en-US-MichelleNeural",
17
+ "Roger": "en-US-RogerNeural",
18
+ "Natasha": "en-AU-NatashaNeural",
19
+ "William": "en-AU-WilliamNeural",
20
+ "Clara": "en-CA-ClaraNeural",
21
+ "Liam": "en-CA-LiamNeural",
22
+ "Libby": "en-GB-LibbyNeural",
23
+ "Maisie": "en-GB-MaisieNeural",
24
+ "Ryan": "en-GB-RyanNeural",
25
+ "Sonia": "en-GB-SoniaNeural",
26
+ "Thomas": "en-GB-ThomasNeural",
27
+ "Sam": "en-HK-SamNeural",
28
+ "Yan": "en-HK-YanNeural",
29
+ "Connor": "en-IE-ConnorNeural",
30
+ "Emily": "en-IE-EmilyNeural",
31
+ "Neerja": "en-IN-NeerjaNeural",
32
+ "Prabhat": "en-IN-PrabhatNeural",
33
+ "Asilia": "en-KE-AsiliaNeural",
34
+ "Chilemba": "en-KE-ChilembaNeural",
35
+ "Abeo": "en-NG-AbeoNeural",
36
+ "Ezinne": "en-NG-EzinneNeural",
37
+ "Mitchell": "en-NZ-MitchellNeural",
38
+ "James": "en-PH-JamesNeural",
39
+ "Rosa": "en-PH-RosaNeural",
40
+ "Luna": "en-SG-LunaNeural",
41
+ "Wayne": "en-SG-WayneNeural",
42
+ "Elimu": "en-TZ-ElimuNeural",
43
+ "Imani": "en-TZ-ImaniNeural",
44
+ "Leah": "en-ZA-LeahNeural",
45
+ "Luke": "en-ZA-LukeNeural"
46
+ },
47
+ "Vietnamese": {
48
+ "HoaiMy": "vi-VN-HoaiMyNeural",
49
+ "NamMinh": "vi-VN-NamMinhNeural"
50
+ }
51
+ }
52
 
53
+ pipe = pipeline("text2text-generation", model="mush42/fine-tashkeel")
54
+
55
+ async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False, speed=0, pitch=0):
56
+
57
+ # Remove diacritics from Arabic text then add tashkeel
58
+ if language_code == "Arabic" and tashkeel_checkbox:
59
+ text = pipe(araby.strip_diacritics(text))[0]["generated_text"].strip()
60
 
61
+ # Get the voice for the selected language and speaker
62
+ voice = language_dict[language_code][speaker]
63
+ rate_str = f"{speed:+d}%"
64
  pitch_str = f"{pitch:+d}Hz"
65
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
66
+
67
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
68
+ tmp_path = tmp_file.name
69
+ await communicate.save(tmp_path)
 
70
 
71
+ return text, tmp_path
 
 
 
72
 
 
 
73
 
74
+ def get_speakers(language):
75
+ print(language)
76
+ speakers = list(language_dict[language].keys())
77
+ return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
78
+
79
+
80
+ default_language = None
81
+ default_speaker = None
82
+ with gr.Blocks(title="Inspire4DEV - Đọc Văn Bản") as demo:
83
+ gr.HTML("<center><h1>CHUYỂN VĂN BẢN THÀNH GIỌNG ĐỌC</h1></center>")
84
+ gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} ngôn ngữ được hỗ trợ {', '.join(language_dict.keys())}</h3>")
85
+ gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>")
86
+ with gr.Row():
87
+ with gr.Column():
88
+ input_text = gr.Textbox(lines=5, label="Văn bản", placeholder="Hãy nhập văn bản cần chuyển thành giọng đọc")
89
+ language = gr.Dropdown(
90
+ choices=list(language_dict.keys()), value=default_language, label="Lựa chọn ngôn ngữ", interactive=True
91
+ )
92
+ speaker = gr.Dropdown(choices=[], value=default_speaker, label="Giọng Đọc", interactive=False)
93
+ tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
94
+ speedrate = gr.Slider(minimum=-50, maximum=50, value=0, label="Tốc độ đọc (%)", step=1)
95
+ pitchadj = gr.Slider(minimum=-20, maximum=20, value=0, label="Tông giọng (Hz)", step=1)
96
+ run_btn = gr.Button(value="TẠO GIỌNG ĐỌC", variant="primary")
97
+
98
+ with gr.Column():
99
+ output_text = gr.Textbox(label="Văn bản sau khi chuyển đổi")
100
+ output_audio = gr.Audio(type="filepath", label="Âm thanh")
101
+
102
+ language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
103
+ run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox, speedrate, pitchadj], outputs=[output_text, output_audio])
 
 
 
 
 
 
 
 
 
104
 
 
105
  if __name__ == "__main__":
106
+ demo.queue().launch(share=False)