import gradio as gr import base64 import numpy as np from scipy.io import wavfile from voice_processing import tts, get_model_names, voice_mapping from io import BytesIO import asyncio async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload): try: edge_tts_voice = voice_mapping.get(selected_voice) if not edge_tts_voice: return {"error": f"Invalid voice '{selected_voice}'."}, None voice_upload_file = None if use_uploaded_voice and voice_upload is not None: with open(voice_upload.name, 'rb') as f: voice_upload_file = f.read() info, edge_output_filename, tts_output_data = await asyncio.wait_for( tts(model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file), timeout=60 # Adjust timeout as needed ) if isinstance(info, dict) and "error" in info: return info, None tgt_sr, audio_output = tts_output_data audio_bytes = None if isinstance(audio_output, np.ndarray): byte_io = BytesIO() wavfile.write(byte_io, tgt_sr, audio_output.astype(np.int16)) byte_io.seek(0) audio_bytes = byte_io.getvalue() else: audio_bytes = audio_output # Clean up the temporary EdgeTTS output file if it exists if edge_output_filename and os.path.exists(edge_output_filename): os.remove(edge_output_filename) audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" return {"info": info}, audio_data_uri except asyncio.TimeoutError: return {"error": "Operation timed out"}, None except asyncio.CancelledError: return {"error": "Operation was cancelled"}, None except Exception as e: print(f"Error in convert_tts: {str(e)}") return {"error": str(e)}, None def get_models(): return get_model_names() def get_voices(): return list(voice_mapping.keys()) iface = gr.Interface( fn=convert_tts, inputs=[ gr.Dropdown(choices=get_models(), label="Model", interactive=True), gr.Textbox(label="Text", placeholder="Enter text here"), gr.Dropdown(choices=get_voices(), label="Voice", interactive=True), gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), gr.Checkbox(label="Use Uploaded Voice"), gr.File(label="Voice File") ], outputs=[ gr.JSON(label="Info"), gr.Audio(label="Generated Audio", type="numpy") ], title="Text-to-Speech Conversion" ).queue(concurrency_limit=6, max_batch_size=1) iface.launch()