import gradio as gr import edge_tts import asyncio import tempfile import os from typing import Dict, Tuple from collections import defaultdict async def text_to_speech(text: str, voice: str, rate: float, volume: float, pitch: float) -> str: output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") communicate = edge_tts.Communicate( text, voice, rate=f"{'+' if rate >= 0 else ''}{rate}%", volume=f"+{volume}%", pitch=f"{'+' if pitch >= 0 else ''}{pitch}Hz" ) await communicate.save(output_file.name) return output_file.name async def list_voices() -> Dict[str, Dict]: voices = await edge_tts.list_voices() return {v['ShortName']: {'name': v['ShortName'], 'language': v['Locale']} for v in voices} def process_voices(voices: Dict[str, Dict]) -> Dict[str, Dict[str, str]]: processed_voices = defaultdict(dict) for full_name, details in voices.items(): language = details['language'] speaker_name = full_name.split('-')[2].replace('Neural', '') processed_voices[language][speaker_name] = full_name return dict(processed_voices) async def generate_speech(text_input, selected_language, selected_speaker, rate, volume, pitch, processed_voices): if not text_input: return "Por favor, introduce un texto." selected_voice = processed_voices[selected_language][selected_speaker] output_file = await text_to_speech(text_input, selected_voice, rate, volume, pitch) return output_file async def main(): voices = await list_voices() processed_voices = process_voices(voices) # Filtrar idiomas para que solo incluyan "en" y "es" filtered_languages = [lang for lang in processed_voices.keys() if lang.startswith(('en', 'es'))] with gr.Blocks() as demo: gr.Markdown("hablandose bien, chequear e ir probando con texto corto la velocidad antes de introducir mas ") with gr.Row(): text_input = gr.Textbox(label="Ingresar texto a convertir a voz:", lines=5) audio_output = gr.Audio(label="Salida de voz") with gr.Row(): # Idiomas filtrados y "es-US" como predeterminado selected_language = gr.Dropdown(filtered_languages, label="Selecciona el idioma:", value="es-US") # Obtener la lista de hablantes para "es-US" initial_speakers = list(processed_voices["es-US"].keys()) # Seleccionar "Alonso" si está disponible, de lo contrario, el primer hablante de la lista default_speaker = "Alonso" if "Alonso" in initial_speakers else initial_speakers[0] selected_speaker = gr.Dropdown(initial_speakers, label="Selecciona la persona:", value=default_speaker) with gr.Row(): rate_slider = gr.Slider(minimum=-20, maximum=20, value=0, step=1, label="Velocidad (%)") volume_slider = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Volumen (%)") pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, step=1, label="Tono (Hz)") btn = gr.Button("Generar voz") # Actualizar dropdown de hablantes al cambiar el idioma def update_speakers(selected_language): return gr.Dropdown(choices=list(processed_voices[selected_language].keys())) selected_language.change(update_speakers, inputs=selected_language, outputs=selected_speaker) btn.click( generate_speech, inputs=[text_input, selected_language, selected_speaker, rate_slider, volume_slider, pitch_slider, gr.State(processed_voices)], outputs=audio_output ) demo.launch() if __name__ == '__main__': asyncio.run(main())