|
import gradio as gr |
|
from audiocraft.data.audio_utils import normalize_audio |
|
from audiocraft.models import MusicGen |
|
from audiotools import AudioSignal |
|
|
|
from pyharp import ModelCard, build_endpoint, save_and_return_filepath |
|
|
|
card = ModelCard( |
|
name='Micro Musicgen Jungle', |
|
description="The jungle version of the micro-musicgen model series. Use a prompt duration of 0 to generate unconditional audio. (WHICH WORKS BETTER) Outpainting is not really tested by me, I just thought it would be cool to have it here because you work with input audio.\n\n HAVE FUNNNNNNNNN", |
|
author='Aaron Abebe', |
|
tags=['musicgen', 'jungle', 'micro-musicgen', 'unconditional', 'generation'] |
|
) |
|
|
|
model = MusicGen.get_pretrained("pharoAIsanders420/micro-musicgen-jungle") |
|
|
|
|
|
def process_fn(input_audio_path, gen_duration, prompt_duration): |
|
""" |
|
Process the input audio and generate new audio by sampling from the micro-musicgen-jungle model. |
|
Supports both unconditional and conditional generation. |
|
|
|
Args: |
|
input_audio_path (str): the audio filepath to be processed. |
|
gen_duration (int): the duration of the generated audio. |
|
prompt_duration (int): the duration of the input conditioning audio. |
|
|
|
Returns: |
|
output_audio_path (str): the filepath of the processed audio. |
|
""" |
|
sig = AudioSignal(input_audio_path) |
|
y, sr = sig.audio_data[0], sig.sample_rate |
|
|
|
model.set_generation_params( |
|
duration=gen_duration, |
|
temperature=1.05, |
|
cfg_coef=3, |
|
) |
|
|
|
if prompt_duration is None or prompt_duration == 0: |
|
output = model.generate_unconditional(1) |
|
else: |
|
num_samples = int(prompt_duration * sr) |
|
if y.shape[1] < num_samples: |
|
raise ValueError("The existing audio is too short for the specified prompt duration.") |
|
|
|
start_sample = y.shape[1] - num_samples |
|
prompt_waveform = y[..., start_sample:] |
|
|
|
|
|
output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr) |
|
|
|
output = normalize_audio( |
|
output, |
|
sample_rate=model.sample_rate, |
|
strategy="loudness", |
|
loudness_headroom_db=10, |
|
loudness_compressor=True, |
|
) |
|
sig.audio_data = output.cpu() |
|
return save_and_return_filepath(sig) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
inputs = [ |
|
gr.Audio( |
|
label="Ignore Me: I only generate, I don't consume", |
|
type='filepath' |
|
), |
|
gr.Slider( |
|
minimum=10, |
|
maximum=30, |
|
step=1, |
|
value=10, |
|
label="Generation Duration" |
|
), |
|
gr.Slider( |
|
minimum=0, |
|
maximum=10, |
|
step=1, |
|
value=2, |
|
label="Input Conditioning Duration" |
|
), |
|
] |
|
|
|
output = gr.Audio(label='Audio Output', type='filepath') |
|
widgets = build_endpoint(inputs, output, process_fn, card) |
|
|
|
demo.queue() |
|
demo.launch(share=True) |
|
|