import streamlit as st from transformers import pipeline import io import numpy as np import requests # Caching the text-to-speech model @st.cache_resource def load_tts_pipeline(): return pipeline("text-to-speech", model="microsoft/speecht5_tts") # Initialize the model once using cache tts_pipe = load_tts_pipeline() # Initialize session state for conversation history, bot response, and selected options if 'conversation_history' not in st.session_state: st.session_state.conversation_history = "" if 'tts_audio' not in st.session_state: st.session_state.tts_audio = None def get_speaker_embeddings(): url = "https://huggingface.co/datasets/Matthijs/cmu-arctic-xvectors/resolve/main/xvectors.npy" response = requests.get(url) if 'application/octet-stream' in response.headers['Content-Type']: with io.BytesIO(response.content) as buffer: try: speaker_embeddings = np.load(buffer, allow_pickle=True) except ValueError as e: st.error("Failed to load speaker embeddings.") speaker_embeddings = None else: st.error("Unexpected file type.") speaker_embeddings = None return speaker_embeddings def convert_text_to_speech(text): speaker_embeddings = get_speaker_embeddings() # Obtain speaker embeddings if speaker_embeddings is None: return None try: audio = tts_pipe(text, speaker_embeddings=speaker_embeddings) return audio except Exception as e: st.error(f"Error generating speech: {e}") return None def convert_audio_to_bytes(audio): audio_buffer = io.BytesIO(audio['audio']) return audio_buffer # Sidebar options st.sidebar.title("App Settings") feature = st.sidebar.radio("Choose Feature:", ("Text-to-Speech", "Other Options")) if feature == "Text-to-Speech": st.title("🗣 Text-to-Speech Converter") st.subheader("Convert your text to speech using a TTS model!") user_message = st.text_area("Enter text to convert to speech:") if st.button("Convert"): if user_message: tts_audio = convert_text_to_speech(user_message) if tts_audio: audio_bytes = convert_audio_to_bytes(tts_audio) st.audio(audio_bytes, format='audio/wav') st.success("Conversion successful!") else: st.error("Conversion failed.") else: st.warning("Please enter text before converting.") st.markdown("---") st.markdown("### About this App") st.info("This app uses a text-to-speech model from the Hugging Face Transformers library. Enter text to hear it spoken out loud.") st.sidebar.markdown("---") st.sidebar.write("Created by [Your Name](https://github.com/yourprofile)")