Spaces:
Runtime error
Runtime error
# Utils | |
import timeit | |
import soundfile as sf | |
# Streamlit | |
import streamlit as st | |
# Custom elements | |
from elements.component import ( | |
centered_text, | |
) | |
def generate_voice( | |
input_text, | |
): | |
# TTS Inference | |
start_time = timeit.default_timer() | |
c, c_length, phoneme = st.session_state.TTS.tokenize(input_text) | |
tok_time = timeit.default_timer() - start_time | |
start_time = timeit.default_timer() | |
voice = st.session_state.TTS.vocalize(c, c_length) | |
tts_time = timeit.default_timer() - start_time | |
# Time stats | |
total_infer_time = tts_time + tok_time | |
audio_time = voice.shape[-1] / 22050 | |
rtf = total_infer_time / audio_time | |
rt_ratio = 1 / rtf | |
# Save audio (bug in Streamlit, can't play numpy array directly) | |
sf.write(f"cache_sound/{st.session_state.random_str}.wav", voice[0,0], 22050) | |
# Play audio | |
st.audio(f"cache_sound/{st.session_state.random_str}.wav", format = "audio/wav") | |
st.caption("Generated Voice") | |
st.code( | |
f"💬 Output Audio: {str(audio_time)[:6]} sec.\n\n⏳ Elapsed time for:\n => Tokenization: {str(tok_time)[:6]} sec.\n => Model Inference: {str(tts_time)[:6]} sec.\n\n⏰ Real-time Factor (RTF): {str(rtf)[:6]}\n\n🏃 The model runs {str(rt_ratio)[:6]} x faster than real-time \ | |
", | |
language = "bash", | |
) | |
st.caption("Elapsed Time Stats") |