Spaces:
Sleeping
Sleeping
import sys | |
print(sys.path) | |
sys.path.append('/home/user/audio_ai/diffusers_harp/venv/src') | |
from pyharp import ModelCard, build_endpoint, save_and_return_filepath | |
from audiotools import AudioSignal | |
import scipy | |
import torch | |
import gradio as gr | |
from diffusers import AudioLDM2Pipeline | |
import subprocess as sp | |
#harp_deps = [ | |
#"descript-audiotools"] | |
# | |
#try: | |
# from pyharp import ModelCard, build_endpoint, save_and_return_filepath | |
#except ImportError: | |
# print("Installing harp dependencies...") | |
# sp.check_call(["pip", "install", *harp_deps]) | |
# sp.check_call(["pip", "install", "-e git+https://github.com/audacitorch/pyharp.git#egg=pyharp"]) | |
# sp.check_call(["pip", "install", "pydantic<2.0.0"]) | |
# from pyharp import ModelCard, build_endpoint, save_and_return_filepath | |
# Create a Model Card | |
card = ModelCard( | |
name='Diffusers AudioLDM2 Generation', | |
description='AudioLDM2 text-to-audio generation, operates on region selected in track. Not conditioned on selected audio, simply replaces audio in source track with generation.', | |
author='Team Audio', | |
tags=['AudioLDM', 'Diffusers', 'Generation'] | |
) | |
# Load the model | |
repo_id = "cvssp/audioldm2" | |
pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch.float16) | |
pipe = pipe.to("cuda") | |
def process_fn(input_audio_path, prompt, negative_prompt, seed, num_inference_steps, audio_length_in_s, num_waveforms_per_prompt): | |
""" | |
This function defines the audio processing steps | |
Args: | |
input_audio_path (str): the audio filepath to be processed. | |
<YOUR_KWARGS>: additional keyword arguments necessary for processing. | |
NOTE: These should correspond to and match order of UI elements defined below. | |
Returns: | |
output_audio_path (str): the filepath of the processed audio. | |
""" | |
sig = AudioSignal(input_audio_path) | |
outfile = "./output.wav" | |
#prompt = "The sound of a hammer hitting a wooden surface." | |
#negative_prompt = "Low quality." | |
# set the seed for generator | |
generator = torch.Generator("cuda").manual_seed(int(seed)) | |
audio = pipe( | |
prompt, | |
negative_prompt=negative_prompt, | |
num_inference_steps=int(num_inference_steps), | |
audio_length_in_s=audio_length_in_s, | |
num_waveforms_per_prompt=int(num_waveforms_per_prompt), | |
generator=generator, | |
).audios | |
scipy.io.wavfile.write(outfile, rate=16000, data=audio[0]) | |
return outfile | |
# Build the endpoint | |
with gr.Blocks() as webapp: | |
# Define your Gradio interface | |
inputs = [ | |
gr.Audio( | |
label="Audio Input", | |
type="filepath" | |
), | |
gr.Text( | |
label="Prompt", | |
interactive=True | |
), | |
gr.Text( | |
label="Negative Prompt", | |
interactive=True | |
), | |
gr.Slider( | |
label="seed", | |
minimum="0", | |
maximum="65535", | |
value="0", | |
step="1" | |
), | |
gr.Slider( | |
minimum=1, maximum=500, | |
step=1, value=1, | |
label="Inference Steps" | |
), | |
gr.Slider( | |
minimum=2.5, maximum=10.0, | |
step=2.5, value=2.5, | |
label="Duration" | |
), | |
gr.Slider( | |
minimum=1, maximum=10, | |
step=1, value=1, | |
label="Waveforms Per Prompt" | |
), | |
] | |
# make an output audio widget | |
output = gr.Audio(label="Audio Output", type="filepath") | |
# Build the endpoint | |
ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card) | |
#webapp.queue() | |
webapp.launch(share=True) | |