Spaces:
Runtime error
Runtime error
import os | |
import torch | |
import torchaudio | |
import numpy as np | |
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
model_path = hf_hub_download( | |
repo_id="DennisHung/Pre-trained_AudioMAE_weights", | |
filename="pretrained.pth", | |
local_dir="./", | |
local_dir_use_symlinks=False | |
) | |
model_path = hf_hub_download( | |
repo_id="DennisHung/Pre-trained_AudioMAE_weights", | |
filename="pytorch_model.bin", | |
local_dir="./", | |
local_dir_use_symlinks=False | |
) | |
from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline | |
# Initialize AudioLDM2 Pipeline | |
pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
pipeline.to(device) | |
# Audio morphing function | |
def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"): | |
save_lora_dir = "output" | |
os.makedirs(save_lora_dir, exist_ok=True) | |
# Load audio and compute duration | |
waveform, sample_rate = torchaudio.load(audio_file1) | |
duration = waveform.shape[1] / sample_rate | |
duration = int(duration) | |
# Perform morphing using the pipeline | |
_ = pipeline( | |
audio_file=audio_file1, | |
audio_file2=audio_file2, | |
audio_length_in_s=duration, | |
time_pooling=2, | |
freq_pooling=2, | |
prompt_1=prompt1, | |
prompt_2=prompt2, | |
negative_prompt_1=negative_prompt1, | |
negative_prompt_2=negative_prompt2, | |
save_lora_dir=save_lora_dir, | |
use_adain=True, | |
use_reschedule=False, | |
num_inference_steps=50, | |
lamd=0.6, | |
output_path=save_lora_dir, | |
num_frames=5, | |
fix_lora=None, | |
use_lora=True, | |
lora_steps=50, | |
noisy_latent_with_lora=True, | |
morphing_with_lora=True, | |
use_morph_prompt=True, | |
guidance_scale=7.5, | |
) | |
# Collect the output file paths | |
output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")] | |
return output_paths | |
# Gradio interface function | |
def interface(audio1, audio2, prompt1, prompt2): | |
output_paths = morph_audio(audio1, audio2, prompt1, prompt2) | |
return output_paths | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=interface, | |
inputs=[ | |
gr.Audio(label="Upload Audio File 1", type="filepath"), | |
gr.Audio(label="Upload Audio File 2", type="filepath"), | |
# gr.Slider(4, 6, step=1, label="Octave 1"), | |
gr.Textbox(label="Prompt for Audio File 1"), | |
gr.Textbox(label="Prompt for Audio File 2") | |
], | |
outputs=[ | |
gr.Audio(label="Morphing audio 1"), | |
gr.Audio(label="Morphing audio 2"), | |
gr.Audio(label="Morphing audio 3"), | |
gr.Audio(label="Morphing audio 4"), | |
gr.Audio(label="Morphing audio 5"), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() |