import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import tts, get_model_names, voice_mapping
from io import BytesIO
import asyncio

async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
    try:
        edge_tts_voice = voice_mapping.get(selected_voice)
        if not edge_tts_voice:
            return {"error": f"Invalid voice '{selected_voice}'."}, None

        voice_upload_file = None
        if use_uploaded_voice and voice_upload is not None:
            with open(voice_upload.name, 'rb') as f:
                voice_upload_file = f.read()

        info, edge_output_filename, tts_output_data = await asyncio.wait_for(
            tts(model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file),
            timeout=60  # Adjust timeout as needed
        )

        if isinstance(info, dict) and "error" in info:
            return info, None

        tgt_sr, audio_output = tts_output_data

        audio_bytes = None
        if isinstance(audio_output, np.ndarray):
            byte_io = BytesIO()
            wavfile.write(byte_io, tgt_sr, audio_output.astype(np.int16))
            byte_io.seek(0)
            audio_bytes = byte_io.getvalue()
        else:
            audio_bytes = audio_output

        # Clean up the temporary EdgeTTS output file if it exists
        if edge_output_filename and os.path.exists(edge_output_filename):
            os.remove(edge_output_filename)

        audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
        return {"info": info}, audio_data_uri

    except asyncio.TimeoutError:
        return {"error": "Operation timed out"}, None
    except asyncio.CancelledError:
        return {"error": "Operation was cancelled"}, None
    except Exception as e:
        print(f"Error in convert_tts: {str(e)}")
        return {"error": str(e)}, None

def get_models():
    return get_model_names()

def get_voices():
    return list(voice_mapping.keys())

iface = gr.Interface(
    fn=convert_tts,
    inputs=[
        gr.Dropdown(choices=get_models(), label="Model", interactive=True),
        gr.Textbox(label="Text", placeholder="Enter text here"),
        gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
        gr.Checkbox(label="Use Uploaded Voice"),
        gr.File(label="Voice File")
    ],
    outputs=[
        gr.JSON(label="Info"),
        gr.Audio(label="Generated Audio", type="numpy")
    ],
    title="Text-to-Speech Conversion"
).queue(concurrency_limit=6, max_batch_size=1)

iface.launch()