artificialguybr's picture
Update app.py
8b4b52c verified
raw
history blame
2.13 kB
import gradio as gr
import spaces
import torchaudio
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
# Importação necessária para o funcionamento do modelo e manipulação de áudio
model = MusicGen.get_pretrained('nateraw/musicgen-songstarter-v0.2')
model.set_generation_params(duration=8) # Duração de 8 segundos para a geração
@spaces.GPU(duration=120) # Decorador para habilitar o uso de GPU
def generate_audio(mode, descriptions=None, melody_path=None):
if mode == 'unconditional':
wav = model.generate_unconditional(4)
elif mode == 'descriptions':
if descriptions is None:
descriptions = ['acoustic, guitar, melody, trap, d minor, 90 bpm']
descriptions = descriptions.split(",") # Converte a string em lista
wav = model.generate(descriptions * 3) # Gera 3 amostras
elif mode == 'melody':
if melody_path is None:
return "Melody path cannot be empty for melody mode."
melody, sr = torchaudio.load(melody_path)
if descriptions is None:
descriptions = ['acoustic, guitar, melody, trap, d minor, 90 bpm']
descriptions = descriptions.split(",") # Converte a string em lista
wav = model.generate_with_chroma(descriptions, melody[None].expand(3, -1, -1), sr)
# Salva os arquivos de áudio gerados
for idx, one_wav in enumerate(wav):
audio_write(f'output_{idx}.wav', one_wav.cpu(), model.sample_rate,
strategy="loudness", loudness_compressor=True)
# Retorna os caminhos dos arquivos de áudio gerados
return [f"output_{idx}.wav" for idx in range(len(wav))]
# Define a interface de usuário com Gradio
iface = gr.Interface(
fn=generate_audio,
inputs=[
gr.Dropdown(['unconditional', 'descriptions', 'melody'], label="Generation Mode"),
gr.Textbox(label="Descriptions (comma-separated, optional)", optional=True),
gr.File(label="Melody File Path (.mp3, optional)", optional=True, type="filepath")],
outputs=gr.File(label="Generated Audio", type="file", multiple=True)
)
iface.launch()