import streamlit as st import scipy from transformers import AutoProcessor, BarkModel import os os.environ["SUNO_OFFLOAD_CPU"] = "True" os.environ["SUNO_USE_SMALL_MODELS"] = "True" from bark import SAMPLE_RATE, generate_audio, preload_models from scipy.io.wavfile import write as write_wav from transformers import AutoProcessor, BarkModel print("Loading models") processor = AutoProcessor.from_pretrained("suno/bark") model = BarkModel.from_pretrained("suno/bark") # download and load all models #preload_models() #from streamlit_autorefresh import st_autorefresh # Run the autorefresh about every 2000 milliseconds (2 seconds) and stop # after it's been refreshed 100 times. #count = st_autorefresh(interval=2000, limit=100, key="fizzbuzzcounter") st.write('Welcome to Text to Audio generator app') def generate_audio_from_text(message): audio_array = generate_audio(message) scipy.io.wavfile.write("audio.wav", rate=SAMPLE_RATE, data=audio_array) def generate_audio_2(message): voice_preset = "v2/en_speaker_6" inputs = processor(message, voice_preset=voice_preset) audio_array = model.generate(**inputs) audio_array = audio_array.numpy().squeeze() sample_rate = model.generation_config.sample_rate scipy.io.wavfile.write("audio.wav", rate=sample_rate, data=audio_array) sentence = st.text_input('Input your sentence here:') if 'clicked' not in st.session_state: st.session_state.clicked = False def click_button(): st.session_state.clicked = True generate_audio_2(sentence) audio_file = open('audio.wav','rb') #enter the filename with filepath audio_bytes = audio_file.read() #reading the file st.audio(audio_bytes, format='audio/wav') #displaying the audio st.button('Click me', on_click=click_button)