import edge_tts
import gradio as gr
import tempfile

language_dict = {
'English-Jenny (Female)': 'en-US-JennyNeural',
}

async def text_to_speech_edge(text, language_code, rate, volume, pitch):
    voice = language_dict.get(language_code, "default_voice")
    
    rates = "+" + str(rate) + "%" if rate >= 0 else str(rate) + "%"
    volumes = "+" + str(volume) + "%" if volume >= 0 else str(volume) + "%"
    pitchs = "+" + str(pitch) + "Hz" if pitch >= 0 else str(pitch) + "Hz"

    communicate = edge_tts.Communicate(text, voice, rate=rates, volume=volumes, pitch=pitchs, proxy=None)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return f"Speech synthesis completed for: {text}", tmp_path

input_text = gr.Textbox(lines=5, label="Input Text")
output_text = gr.Textbox(label="Output Text")
output_audio = gr.Audio(type="filepath", label="Exported Audio")
language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model")
rate = gr.Slider(-100, 100,step=1,value=0,label="Rate",info="Rate",interactive=True)

volume = gr.Slider(-100,100,step=1,value=0,label="Volume",info="Volume",interactive=True)

pitch = gr.Slider(-100,100,step=1,value=0,label="Pitch",info="Pitch",interactive=True)


interface = gr.Interface(
    fn=text_to_speech_edge,
    inputs=[input_text, language, rate, volume,pitch],
    outputs=[output_text, output_audio],
    title="Edge-TTS",
    description="Microsoft Edge Text-To-Speech (Created by Yash Chouhan)",

)

interface.launch(share=True)