File size: 4,218 Bytes
12d9140
 
 
 
92316b0
137f837
6cecadb
12d9140
 
6cecadb
12d9140
 
6cecadb
 
 
 
12d9140
 
 
 
6cecadb
 
12d9140
 
6cecadb
 
 
 
 
12d9140
 
 
6cecadb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d138cc3
137f837
 
 
 
6cecadb
137f837
 
 
 
 
 
 
 
 
 
 
d138cc3
 
 
137f837
 
 
6cecadb
12d9140
 
 
 
 
 
 
 
 
 
6cecadb
137f837
 
 
 
 
 
 
 
92316b0
 
6cecadb
12d9140
 
6cecadb
 
 
 
 
 
 
 
 
d138cc3
137f837
6cecadb
137f837
92316b0
12d9140
868c283
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import streamlit as st
from dotenv import load_dotenv
import base64
import requests
import time

# Load environment variables from the .env file
load_dotenv()

# Set your Hugging Face API token from the environment variable
HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")

# Translation API URL (using LibreTranslate as an example)
TRANSLATION_API_URL = "https://libretranslate.com/translate"

# Function to get user input
def get_text():
    input_text = st.text_input("Enter text for speech generation:", key="input")
    return input_text

# Function to select the language model and target language for translation
def select_language():
    language_options = {
        "English": "en",
        "Spanish": "es",
        "French": "fr",
        "German": "de",
        "Italian": "it",
    }
    selected_language = st.selectbox("Select Language", list(language_options.keys()))
    return language_options[selected_language]

# Function to translate text to English (using LibreTranslate API as an example)
def translate_text(text, target_language):
    if target_language != "en":
        payload = {
            "q": text,
            "source": target_language,
            "target": "en",
        }
        response = requests.post(TRANSLATION_API_URL, json=payload)
        if response.status_code == 200:
            return response.json().get("translatedText", "")
        else:
            st.error(f"Error: {response.status_code} - Could not translate text.")
            return text
    return text  # If English, return the same text

# Function to send a request to the Hugging Face API with retry on 503 and error handling for 500
def generate_speech(model_id, text):
    url = f"https://api-inference.huggingface.co/models/{model_id}"
    headers = {"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}
    payload = {"inputs": text}

    while True:
        response = requests.post(url, headers=headers, json=payload, stream=True)

        if response.status_code == 200:
            return response.content  # Return the audio data if successful
        elif response.status_code == 503:
            # Parse the estimated wait time from the response and wait
            response_data = response.json()
            estimated_time = response_data.get("estimated_time", 10)
            st.info(f"Model is loading, please wait {estimated_time} seconds...")
            time.sleep(estimated_time)  # Wait before retrying
        elif response.status_code == 500:
            st.error("Internal Server Error: The model encountered an issue. Please try again later.")
            return None
        else:
            st.error(f"Error: {response.status_code} - {response.text}")
            return None

# Function to play audio in Streamlit
def play_audio(audio_bytes):
    b64 = base64.b64encode(audio_bytes).decode()
    audio_html = f"""
<audio controls autoplay>
<source src="data:audio/wav;base64,{b64}" type="audio/wav">
        Your browser does not support the audio element.
</audio>
    """
    st.markdown(audio_html, unsafe_allow_html=True)

# Initialize Streamlit UI
st.set_page_config(page_title="Multilingual Text-to-Speech", page_icon="πŸ”Š")
st.header("Multilingual Text-to-Speech Demo")

# Get user input and language selection
user_input = get_text()
selected_language = select_language()

# Create a button for generating speech
submit = st.button('Generate Speech')

# If the generate button is clicked and user input is not empty
if submit and user_input:
    with st.spinner("Translating and generating speech..."):
        # Translate the text to English if needed
        translated_text = translate_text(user_input, selected_language)
        
        # Display both original and translated text
        if selected_language != "en":
            st.write(f"Original Text: {user_input}")
            st.write(f"Translated Text: {translated_text}")

        # Generate speech with retry if the model is loading and handle 500 errors
        audio_data = generate_speech("myshell-ai/MeloTTS-English", translated_text)

        if audio_data:
            play_audio(audio_data)
elif submit:
    st.warning("Please enter some text.")  # Warning for empty input