mcaaroni's picture
feat: implement first version of space for HARP
c5b9912
raw
history blame
2.95 kB
import gradio as gr
from audiocraft.data.audio_utils import normalize_audio
from audiocraft.models import MusicGen
from audiotools import AudioSignal
from pyharp import ModelCard, build_endpoint, save_and_return_filepath
card = ModelCard(
name='Micro Musicgen Jungle',
description="The jungle version of the micro-musicgen model series. Use a prompt duration of 0 to generate unconditional audio. (WHICH WORKS BETTER) Outpainting is not really tested by me, I just thought it would be cool to have it here because you work with input audio.\n\n HAVE FUNNNNNNNNN",
author='Aaron Abebe',
tags=['musicgen', 'jungle', 'micro-musicgen', 'unconditional', 'generation']
)
model = MusicGen.get_pretrained("pharoAIsanders420/micro-musicgen-jungle")
def process_fn(input_audio_path, gen_duration, prompt_duration):
"""
Process the input audio and generate new audio by sampling from the micro-musicgen-jungle model.
Supports both unconditional and conditional generation.
Args:
input_audio_path (str): the audio filepath to be processed.
gen_duration (int): the duration of the generated audio.
prompt_duration (int): the duration of the input conditioning audio.
Returns:
output_audio_path (str): the filepath of the processed audio.
"""
sig = AudioSignal(input_audio_path)
y, sr = sig.audio_data[0], sig.sample_rate
model.set_generation_params(
duration=gen_duration,
temperature=1.05,
cfg_coef=3,
)
if prompt_duration is None or prompt_duration == 0:
output = model.generate_unconditional(1)
else:
num_samples = int(prompt_duration * sr)
if y.shape[1] < num_samples:
raise ValueError("The existing audio is too short for the specified prompt duration.")
start_sample = y.shape[1] - num_samples
prompt_waveform = y[..., start_sample:]
output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr)
output = normalize_audio(
output,
sample_rate=model.sample_rate, # musicgen outputs at 32kHz
strategy="loudness",
loudness_headroom_db=10,
loudness_compressor=True,
)
sig.audio_data = output.cpu()
return save_and_return_filepath(sig)
with gr.Blocks() as demo:
inputs = [
gr.Audio(
label="Ignore Me: I only generate, I don't consume",
type='filepath'
),
gr.Slider(
minimum=10,
maximum=30,
step=1,
value=10,
label="Generation Duration"
),
gr.Slider(
minimum=0,
maximum=10,
step=1,
value=2,
label="Input Conditioning Duration"
),
]
output = gr.Audio(label='Audio Output', type='filepath')
widgets = build_endpoint(inputs, output, process_fn, card)
demo.queue()
demo.launch(share=True)