File size: 2,798 Bytes
eb4bca2
 
64e5450
eb4bca2
64e5450
eb4bca2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64e5450
 
 
456fa97
 
 
 
 
 
 
 
 
 
 
 
64e5450
 
eb4bca2
64e5450
456fa97
 
 
 
 
 
 
 
eb4bca2
64e5450
 
 
eb4bca2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456fa97
 
 
 
 
 
eb4bca2
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import streamlit as st
from transformers import pipeline
import io
import numpy as np
import requests

# Caching the text-to-speech model
@st.cache_resource
def load_tts_pipeline():
    return pipeline("text-to-speech", model="microsoft/speecht5_tts")

# Initialize the model once using cache
tts_pipe = load_tts_pipeline()

# Initialize session state for conversation history, bot response, and selected options
if 'conversation_history' not in st.session_state:
    st.session_state.conversation_history = ""
if 'tts_audio' not in st.session_state:
    st.session_state.tts_audio = None

def get_speaker_embeddings():
    url = "https://huggingface.co/datasets/Matthijs/cmu-arctic-xvectors/resolve/main/xvectors.npy"
    response = requests.get(url)
    
    if 'application/octet-stream' in response.headers['Content-Type']:
        with io.BytesIO(response.content) as buffer:
            try:
                speaker_embeddings = np.load(buffer, allow_pickle=True)
            except ValueError as e:
                st.error("Failed to load speaker embeddings.")
                speaker_embeddings = None
    else:
        st.error("Unexpected file type.")
        speaker_embeddings = None

    return speaker_embeddings

def convert_text_to_speech(text):
    speaker_embeddings = get_speaker_embeddings()  # Obtain speaker embeddings
    if speaker_embeddings is None:
        return None
    try:
        audio = tts_pipe(text, speaker_embeddings=speaker_embeddings)
        return audio
    except Exception as e:
        st.error(f"Error generating speech: {e}")
        return None

def convert_audio_to_bytes(audio):
    audio_buffer = io.BytesIO(audio['audio'])
    return audio_buffer

# Sidebar options
st.sidebar.title("App Settings")

feature = st.sidebar.radio("Choose Feature:", ("Text-to-Speech", "Other Options"))

if feature == "Text-to-Speech":
    st.title("🗣 Text-to-Speech Converter")
    st.subheader("Convert your text to speech using a TTS model!")

    user_message = st.text_area("Enter text to convert to speech:")

    if st.button("Convert"):
        if user_message:
            tts_audio = convert_text_to_speech(user_message)
            if tts_audio:
                audio_bytes = convert_audio_to_bytes(tts_audio)
                st.audio(audio_bytes, format='audio/wav')
                st.success("Conversion successful!")
            else:
                st.error("Conversion failed.")
        else:
            st.warning("Please enter text before converting.")

st.markdown("---")
st.markdown("### About this App")
st.info("This app uses a text-to-speech model from the Hugging Face Transformers library. Enter text to hear it spoken out loud.")

st.sidebar.markdown("---")
st.sidebar.write("Created by [Your Name](https://github.com/yourprofile)")