imagRaf / app.py
RaF33's picture
Update app.py
c9718e5 verified
raw
history blame contribute delete
No virus
1.5 kB
import gradio as gr
from gtts import gTTS
from io import BytesIO
import numpy as np
from pydub import AudioSegment
import tempfile
def text_to_speech(text, language):
if not text:
print("No text provided")
return np.array([]), 22050
if not language:
print("No language selected")
return np.array([]), 22050
try:
tts = gTTS(text=text, lang=language)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as fp:
tts.save(fp.name)
sound = AudioSegment.from_file(fp.name, format="mp3")
samples = np.array(sound.get_array_of_samples())
if samples.size == 0:
print("No audio data generated")
if sound.channels == 2:
samples = samples.reshape((-1, 2))
print(f"Samples: {samples[:10]}") # Print the first 10 samples to diagnose
return samples, sound.frame_rate
except Exception as e:
print(f"Error: {str(e)}")
return np.array([]), 22050
interface = gr.Interface(
fn=text_to_speech,
inputs=[gr.Textbox(lines=2, placeholder="Type your text here..."), gr.Radio(choices=['en', 'es', 'de', 'fr', 'it'], label="Language")],
outputs=[gr.Audio(type="numpy", label="Output Audio"), gr.Label(label="Error Messages")],
title="Text to Speech Converter",
description="Select text and language, and click submit to convert text to speech."
)
if __name__ == "__main__":
interface.launch()