CSH-1220
Update app.py
1834911
raw
history blame
3 kB
import os
import torch
import torchaudio
import numpy as np
import gradio as gr
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(
repo_id="DennisHung/Pre-trained_AudioMAE_weights",
filename="pretrained.pth",
local_dir="./",
local_dir_use_symlinks=False
)
model_path = hf_hub_download(
repo_id="DennisHung/Pre-trained_AudioMAE_weights",
filename="pytorch_model.bin",
local_dir="./",
local_dir_use_symlinks=False
)
from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
# Initialize AudioLDM2 Pipeline
pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipeline.to(device)
# Audio morphing function
def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"):
save_lora_dir = "output"
os.makedirs(save_lora_dir, exist_ok=True)
# Load audio and compute duration
waveform, sample_rate = torchaudio.load(audio_file1)
duration = waveform.shape[1] / sample_rate
duration = int(duration)
# Perform morphing using the pipeline
_ = pipeline(
audio_file=audio_file1,
audio_file2=audio_file2,
audio_length_in_s=duration,
time_pooling=2,
freq_pooling=2,
prompt_1=prompt1,
prompt_2=prompt2,
negative_prompt_1=negative_prompt1,
negative_prompt_2=negative_prompt2,
save_lora_dir=save_lora_dir,
use_adain=True,
use_reschedule=False,
num_inference_steps=50,
lamd=0.6,
output_path=save_lora_dir,
num_frames=5,
fix_lora=None,
use_lora=True,
lora_steps=50,
noisy_latent_with_lora=True,
morphing_with_lora=True,
use_morph_prompt=True,
guidance_scale=7.5,
)
# Collect the output file paths
output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")]
return output_paths
# Gradio interface function
def interface(audio1, audio2, prompt1, prompt2):
output_paths = morph_audio(audio1, audio2, prompt1, prompt2)
return output_paths
# Gradio Interface
demo = gr.Interface(
fn=interface,
inputs=[
gr.Audio(label="Upload Audio File 1", type="filepath"),
gr.Audio(label="Upload Audio File 2", type="filepath"),
# gr.Slider(4, 6, step=1, label="Octave 1"),
gr.Textbox(label="Prompt for Audio File 1"),
gr.Textbox(label="Prompt for Audio File 2")
],
outputs=[
gr.Audio(label="Morphing audio 1"),
gr.Audio(label="Morphing audio 2"),
gr.Audio(label="Morphing audio 3"),
gr.Audio(label="Morphing audio 4"),
gr.Audio(label="Morphing audio 5"),
],
)
if __name__ == "__main__":
demo.launch()