import gradio as gr from audiocraft.data.audio_utils import normalize_audio from audiocraft.models import MusicGen from audiotools import AudioSignal from pyharp import ModelCard, build_endpoint, save_and_return_filepath card = ModelCard( name='Micro Musicgen Jungle', description="The jungle version of the micro-musicgen model series. Use a prompt duration of 0 to generate unconditional audio. (WHICH WORKS BETTER) Outpainting is not really tested by me, I just thought it would be cool to have it here because you work with input audio.\n\n HAVE FUNNNNNNNNN", author='Aaron Abebe', tags=['musicgen', 'jungle', 'micro-musicgen', 'unconditional', 'generation'] ) model = MusicGen.get_pretrained("pharoAIsanders420/micro-musicgen-jungle") def process_fn(input_audio_path, gen_duration, prompt_duration): """ Process the input audio and generate new audio by sampling from the micro-musicgen-jungle model. Supports both unconditional and conditional generation. Args: input_audio_path (str): the audio filepath to be processed. gen_duration (int): the duration of the generated audio. prompt_duration (int): the duration of the input conditioning audio. Returns: output_audio_path (str): the filepath of the processed audio. """ sig = AudioSignal(input_audio_path) y, sr = sig.audio_data[0], sig.sample_rate model.set_generation_params( duration=gen_duration, temperature=1.05, cfg_coef=3, ) if prompt_duration is None or prompt_duration == 0: output = model.generate_unconditional(1) else: num_samples = int(prompt_duration * sr) if y.shape[1] < num_samples: raise ValueError("The existing audio is too short for the specified prompt duration.") start_sample = y.shape[1] - num_samples prompt_waveform = y[..., start_sample:] output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr) output = normalize_audio( output, sample_rate=model.sample_rate, # musicgen outputs at 32kHz strategy="loudness", loudness_headroom_db=10, loudness_compressor=True, ) sig.audio_data = output.cpu() return save_and_return_filepath(sig) with gr.Blocks() as demo: inputs = [ gr.Audio( label="Ignore Me: I only generate, I don't consume", type='filepath' ), gr.Slider( minimum=10, maximum=30, step=1, value=10, label="Generation Duration" ), gr.Slider( minimum=0, maximum=10, step=1, value=2, label="Input Conditioning Duration" ), ] output = gr.Audio(label='Audio Output', type='filepath') widgets = build_endpoint(inputs, output, process_fn, card) demo.queue() demo.launch(share=True)