|
import streamlit as st |
|
import scipy |
|
|
|
from transformers import AutoProcessor, BarkModel |
|
import os |
|
os.environ["SUNO_OFFLOAD_CPU"] = "True" |
|
os.environ["SUNO_USE_SMALL_MODELS"] = "True" |
|
|
|
from bark import SAMPLE_RATE, generate_audio, preload_models |
|
from scipy.io.wavfile import write as write_wav |
|
|
|
from transformers import AutoProcessor, BarkModel |
|
|
|
|
|
print("Loading models") |
|
processor = AutoProcessor.from_pretrained("suno/bark") |
|
model = BarkModel.from_pretrained("suno/bark") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write('Welcome to Text to Audio generator app') |
|
|
|
def generate_audio_from_text(message): |
|
audio_array = generate_audio(message) |
|
scipy.io.wavfile.write("audio.wav", rate=SAMPLE_RATE, data=audio_array) |
|
|
|
def generate_audio_2(message): |
|
|
|
|
|
voice_preset = "v2/en_speaker_6" |
|
|
|
inputs = processor(message, voice_preset=voice_preset) |
|
|
|
audio_array = model.generate(**inputs) |
|
audio_array = audio_array.numpy().squeeze() |
|
sample_rate = model.generation_config.sample_rate |
|
scipy.io.wavfile.write("audio.wav", rate=sample_rate, data=audio_array) |
|
|
|
sentence = st.text_input('Input your sentence here:') |
|
|
|
if 'clicked' not in st.session_state: |
|
st.session_state.clicked = False |
|
|
|
def click_button(): |
|
st.session_state.clicked = True |
|
generate_audio_2(sentence) |
|
audio_file = open('audio.wav','rb') |
|
audio_bytes = audio_file.read() |
|
st.audio(audio_bytes, format='audio/wav') |
|
|
|
|
|
st.button('Click me', on_click=click_button) |