import os import streamlit as st from dotenv import load_dotenv import base64 import requests import time # Load environment variables from the .env file load_dotenv() # Set your Hugging Face API token from the environment variable HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN") # Translation API URL (using LibreTranslate as an example) TRANSLATION_API_URL = "https://libretranslate.com/translate" # Function to get user input def get_text(): input_text = st.text_input("Enter text for speech generation:", key="input") return input_text # Function to select the language model and target language for translation def select_language(): language_options = { "English": "en", "Spanish": "es", "French": "fr", "German": "de", "Italian": "it", } selected_language = st.selectbox("Select Language", list(language_options.keys())) return language_options[selected_language] # Function to translate text to English (using LibreTranslate API as an example) def translate_text(text, target_language): if target_language != "en": payload = { "q": text, "source": target_language, "target": "en", } response = requests.post(TRANSLATION_API_URL, json=payload) if response.status_code == 200: return response.json().get("translatedText", "") else: st.error(f"Error: {response.status_code} - Could not translate text.") return text return text # If English, return the same text # Function to send a request to the Hugging Face API with retry on 503 and error handling for 500 def generate_speech(model_id, text): url = f"https://api-inference.huggingface.co/models/{model_id}" headers = {"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"} payload = {"inputs": text} while True: response = requests.post(url, headers=headers, json=payload, stream=True) if response.status_code == 200: return response.content # Return the audio data if successful elif response.status_code == 503: # Parse the estimated wait time from the response and wait response_data = response.json() estimated_time = response_data.get("estimated_time", 10) st.info(f"Model is loading, please wait {estimated_time} seconds...") time.sleep(estimated_time) # Wait before retrying elif response.status_code == 500: st.error("Internal Server Error: The model encountered an issue. Please try again later.") return None else: st.error(f"Error: {response.status_code} - {response.text}") return None # Function to play audio in Streamlit def play_audio(audio_bytes): b64 = base64.b64encode(audio_bytes).decode() audio_html = f""" """ st.markdown(audio_html, unsafe_allow_html=True) # Initialize Streamlit UI st.set_page_config(page_title="Multilingual Text-to-Speech", page_icon="🔊") st.header("Multilingual Text-to-Speech Demo") # Get user input and language selection user_input = get_text() selected_language = select_language() # Create a button for generating speech submit = st.button('Generate Speech') # If the generate button is clicked and user input is not empty if submit and user_input: with st.spinner("Translating and generating speech..."): # Translate the text to English if needed translated_text = translate_text(user_input, selected_language) # Display both original and translated text if selected_language != "en": st.write(f"Original Text: {user_input}") st.write(f"Translated Text: {translated_text}") # Generate speech with retry if the model is loading and handle 500 errors audio_data = generate_speech("myshell-ai/MeloTTS-English", translated_text) if audio_data: play_audio(audio_data) elif submit: st.warning("Please enter some text.") # Warning for empty input