File size: 1,497 Bytes
6cfc17c 2787f80 56dc038 6cfc17c be9e916 c9718e5 be9e916 c9718e5 be9e916 6dc4c99 be9e916 6dc4c99 be9e916 c9718e5 be9e916 c9718e5 be9e916 c9718e5 56dc038 6cfc17c 1a3caf9 6cfc17c 56dc038 6cfc17c 3e062e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
from gtts import gTTS
from io import BytesIO
import numpy as np
from pydub import AudioSegment
import tempfile
def text_to_speech(text, language):
if not text:
print("No text provided")
return np.array([]), 22050
if not language:
print("No language selected")
return np.array([]), 22050
try:
tts = gTTS(text=text, lang=language)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as fp:
tts.save(fp.name)
sound = AudioSegment.from_file(fp.name, format="mp3")
samples = np.array(sound.get_array_of_samples())
if samples.size == 0:
print("No audio data generated")
if sound.channels == 2:
samples = samples.reshape((-1, 2))
print(f"Samples: {samples[:10]}") # Print the first 10 samples to diagnose
return samples, sound.frame_rate
except Exception as e:
print(f"Error: {str(e)}")
return np.array([]), 22050
interface = gr.Interface(
fn=text_to_speech,
inputs=[gr.Textbox(lines=2, placeholder="Type your text here..."), gr.Radio(choices=['en', 'es', 'de', 'fr', 'it'], label="Language")],
outputs=[gr.Audio(type="numpy", label="Output Audio"), gr.Label(label="Error Messages")],
title="Text to Speech Converter",
description="Select text and language, and click submit to convert text to speech."
)
if __name__ == "__main__":
interface.launch() |