from transformers import AutoProcessor, MusicgenForConditionalGeneration import scipy import streamlit as st BASE_MODEL = "facebook/musicgen-small" processor = AutoProcessor.from_pretrained(BASE_MODEL) model = MusicgenForConditionalGeneration.from_pretrained(BASE_MODEL) def generate_audio(text): inputs = processor( text=[text], padding=True, return_tensors="pt", ) audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256) cleaned_text = text.replace(" ", "_") sampling_rate = model.config.audio_encoder.sampling_rate scipy.io.wavfile.write(f'${cleaned_text}.wav', rate=sampling_rate, data=audio_values[0, 0].numpy()) return f'${cleaned_text}.wav' # Streamlit app title and description st.title("Text-to-Audio Generation") # User input for text prompt text = st.text_area("Enter Text Prompt:") # Generate audio when the user clicks the button if st.button("Generate Audio"): if text: # Generate audio using the ModelManager audio_file_path = generate_audio(text) # Display the audio player audio_data = open(audio_file_path, "rb").read() st.audio(audio_data, format="audio/wav") # Provide a download link for the audio file st.write("Audio Generated Successfully!") st.success(f"Download the audio file [here](/{audio_file_path}).") else: st.warning("Please enter a text prompt.")