import streamlit as st import numpy as np from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError def pad_buffer(audio): # Pad buffer to multiple of 2 bytes buffer_size = len(audio) element_size = np.dtype(np.int16).itemsize if buffer_size % element_size != 0: audio = audio + b'\0' * (element_size - (buffer_size % element_size)) return audio def generate_voice(text, voice_name, model_name): audio = generate( text[:250], # Limit to 250 characters voice=voice_name, model=model_name ) audio_data = np.frombuffer(pad_buffer(audio), dtype=np.int16) audio_bytes = audio_data.tobytes() return audio_bytes st.title("🎤 World's most advanced Text-to-Speech") description = """ A demo of the world's most advanced TTS systems, made by [ElevenLabs](https://elevenlabs.io). Eleven Monolingual is designed to generate highly realistic voices in English, where Eleven Multilingual is a single model supporting multiple languages including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. Sign up on [ElevenLabs](https://elevenlabs.io) to get fast access, long-form generation, voice cloning, API keys, and more! credit goes to "1little coder" """ st.markdown(description) input_text = st.text_area( "Input Text (250 characters max)", value="Hahaha OHH MY GOD! This is SOOO funny, I-I am Eleven a text-to-speech system!", max_chars=250 ) all_voices = voices() input_voice = st.selectbox( "Voice", options=[voice.name for voice in all_voices], index=0 ) input_model = st.radio( "Model", options=["eleven_monolingual_v1", "eleven_multilingual_v1"], index=0 ) if st.button("Generate Voice"): try: audio = generate_voice(input_text, input_voice, input_model) st.audio(audio, format='audio/wav') except UnauthenticatedRateLimitError: st.error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") except Exception as e: st.error(str(e))