|
import gradio as gr |
|
from gtts import gTTS |
|
from io import BytesIO |
|
import numpy as np |
|
from pydub import AudioSegment |
|
import tempfile |
|
|
|
def text_to_speech(text, language): |
|
if not text: |
|
print("No text provided") |
|
return np.array([]), 22050 |
|
if not language: |
|
print("No language selected") |
|
return np.array([]), 22050 |
|
|
|
try: |
|
tts = gTTS(text=text, lang=language) |
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as fp: |
|
tts.save(fp.name) |
|
sound = AudioSegment.from_file(fp.name, format="mp3") |
|
samples = np.array(sound.get_array_of_samples()) |
|
if samples.size == 0: |
|
print("No audio data generated") |
|
if sound.channels == 2: |
|
samples = samples.reshape((-1, 2)) |
|
print(f"Samples: {samples[:10]}") |
|
return samples, sound.frame_rate |
|
except Exception as e: |
|
print(f"Error: {str(e)}") |
|
return np.array([]), 22050 |
|
|
|
interface = gr.Interface( |
|
fn=text_to_speech, |
|
inputs=[gr.Textbox(lines=2, placeholder="Type your text here..."), gr.Radio(choices=['en', 'es', 'de', 'fr', 'it'], label="Language")], |
|
outputs=[gr.Audio(type="numpy", label="Output Audio"), gr.Label(label="Error Messages")], |
|
title="Text to Speech Converter", |
|
description="Select text and language, and click submit to convert text to speech." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |