text_to_speech / app.py
sam6309's picture
Create app.py
36aedbe verified
import streamlit as st
from transformers import pipeline
from gtts import gTTS
from io import BytesIO
import base64
# Define a function to generate text-to-speech audio
def generate_tts(text, voice_type, language):
# Hugging Face pipeline for text-to-speech in English
if language == "English":
model_id = "facebook/fastspeech2-en-ljspeech"
tts_pipeline = pipeline("text-to-speech", model=model_id)
audio_data = tts_pipeline(text, return_tensors=True).audio["array"]
return audio_data
# Google Text-to-Speech (gTTS) for Urdu
elif language == "Urdu":
tts = gTTS(text=text, lang="ur")
mp3_fp = BytesIO()
tts.write_to_fp(mp3_fp)
mp3_fp.seek(0)
return mp3_fp.read()
# Configure Streamlit app
st.title("Multilingual Text-to-Speech Application")
st.write("Generate speech from text in American English and Urdu with different voice styles.")
# Input fields for text and options
text = st.text_area("Enter your text:", "Hello, welcome to the Text-to-Speech app!")
voice_type = st.selectbox(
"Select voice style:", ["Adult", "Child", "Cartoon"]
)
language = st.selectbox("Select language:", ["English", "Urdu"])
if st.button("Generate Audio"):
with st.spinner("Generating audio..."):
try:
audio_data = generate_tts(text, voice_type, language)
if language == "English":
# Provide the audio as a downloadable file
st.audio(audio_data, format="audio/wav")
elif language == "Urdu":
b64 = base64.b64encode(audio_data).decode()
href = f'<a href="data:audio/mpeg;base64,{b64}" download="output.mp3">Download Urdu Audio</a>'
st.markdown(href, unsafe_allow_html=True)
except Exception as e:
st.error(f"Error: {e}")
st.caption("Powered by Hugging Face Transformers and Google TTS")