import gradio as gr
from gtts import gTTS
from io import BytesIO
import numpy as np
from pydub import AudioSegment
import tempfile

def text_to_speech(text, language):
    if not text:
        print("No text provided")
        return np.array([]), 22050
    if not language:
        print("No language selected")
        return np.array([]), 22050

    try:
        tts = gTTS(text=text, lang=language)
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as fp:
            tts.save(fp.name)
            sound = AudioSegment.from_file(fp.name, format="mp3")
            samples = np.array(sound.get_array_of_samples())
            if samples.size == 0:
                print("No audio data generated")
            if sound.channels == 2:
                samples = samples.reshape((-1, 2))
            print(f"Samples: {samples[:10]}")  # Print the first 10 samples to diagnose
            return samples, sound.frame_rate
    except Exception as e:
        print(f"Error: {str(e)}")
        return np.array([]), 22050

interface = gr.Interface(
    fn=text_to_speech,
    inputs=[gr.Textbox(lines=2, placeholder="Type your text here..."), gr.Radio(choices=['en', 'es', 'de', 'fr', 'it'], label="Language")],
    outputs=[gr.Audio(type="numpy", label="Output Audio"), gr.Label(label="Error Messages")],
    title="Text to Speech Converter",
    description="Select text and language, and click submit to convert text to speech."
)

if __name__ == "__main__":
    interface.launch()