import gradio as gr from gtts import gTTS from io import BytesIO import numpy as np from pydub import AudioSegment import tempfile def text_to_speech(text, language): if not text: print("No text provided") return np.array([]), 22050 if not language: print("No language selected") return np.array([]), 22050 try: tts = gTTS(text=text, lang=language) with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as fp: tts.save(fp.name) sound = AudioSegment.from_file(fp.name, format="mp3") samples = np.array(sound.get_array_of_samples()) if samples.size == 0: print("No audio data generated") if sound.channels == 2: samples = samples.reshape((-1, 2)) print(f"Samples: {samples[:10]}") # Print the first 10 samples to diagnose return samples, sound.frame_rate except Exception as e: print(f"Error: {str(e)}") return np.array([]), 22050 interface = gr.Interface( fn=text_to_speech, inputs=[gr.Textbox(lines=2, placeholder="Type your text here..."), gr.Radio(choices=['en', 'es', 'de', 'fr', 'it'], label="Language")], outputs=[gr.Audio(type="numpy", label="Output Audio"), gr.Label(label="Error Messages")], title="Text to Speech Converter", description="Select text and language, and click submit to convert text to speech." ) if __name__ == "__main__": interface.launch()