import edge_tts import gradio as gr import tempfile language_dict = { 'English-Jenny (Female)': 'en-US-JennyNeural', } async def text_to_speech_edge(text, language_code, rate, volume, pitch): voice = language_dict.get(language_code, "default_voice") rates = "+" + str(rate) + "%" if rate >= 0 else str(rate) + "%" volumes = "+" + str(volume) + "%" if volume >= 0 else str(volume) + "%" pitchs = "+" + str(pitch) + "Hz" if pitch >= 0 else str(pitch) + "Hz" communicate = edge_tts.Communicate(text, voice, rate=rates, volume=volumes, pitch=pitchs, proxy=None) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return f"Speech synthesis completed for: {text}", tmp_path input_text = gr.Textbox(lines=5, label="Input Text") output_text = gr.Textbox(label="Output Text") output_audio = gr.Audio(type="filepath", label="Exported Audio") language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model") rate = gr.Slider(-100, 100,step=1,value=0,label="Rate",info="Rate",interactive=True) volume = gr.Slider(-100,100,step=1,value=0,label="Volume",info="Volume",interactive=True) pitch = gr.Slider(-100,100,step=1,value=0,label="Pitch",info="Pitch",interactive=True) interface = gr.Interface( fn=text_to_speech_edge, inputs=[input_text, language, rate, volume,pitch], outputs=[output_text, output_audio], title="Edge-TTS", description="Microsoft Edge Text-To-Speech (Created by Yash Chouhan)", ) interface.launch(share=True)