elevenlabs-stt / app.py
Amitesh007's picture
Update app.py
2c37aa9
import streamlit as st
import numpy as np
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
def pad_buffer(audio):
# Pad buffer to multiple of 2 bytes
buffer_size = len(audio)
element_size = np.dtype(np.int16).itemsize
if buffer_size % element_size != 0:
audio = audio + b'\0' * (element_size - (buffer_size % element_size))
return audio
def generate_voice(text, voice_name, model_name):
audio = generate(
text[:250], # Limit to 250 characters
voice=voice_name,
model=model_name
)
audio_data = np.frombuffer(pad_buffer(audio), dtype=np.int16)
audio_bytes = audio_data.tobytes()
return audio_bytes
st.title("🎀 World's most advanced Text-to-Speech")
description = """
A demo of the world's most advanced TTS systems, made by [ElevenLabs](https://elevenlabs.io). Eleven Monolingual is designed to generate highly realistic voices in English, where Eleven Multilingual is a single model supporting multiple languages including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. Sign up on [ElevenLabs](https://elevenlabs.io) to get fast access, long-form generation, voice cloning, API keys, and more!
credit goes to "1little coder"
"""
st.markdown(description)
input_text = st.text_area(
"Input Text (250 characters max)",
value="Hahaha OHH MY GOD! This is SOOO funny, I-I am Eleven a text-to-speech system!",
max_chars=250
)
all_voices = voices()
input_voice = st.selectbox(
"Voice",
options=[voice.name for voice in all_voices],
index=0
)
input_model = st.radio(
"Model",
options=["eleven_monolingual_v1", "eleven_multilingual_v1"],
index=0
)
if st.button("Generate Voice"):
try:
audio = generate_voice(input_text, input_voice, input_model)
st.audio(audio, format='audio/wav')
except UnauthenticatedRateLimitError:
st.error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
except Exception as e:
st.error(str(e))