import os import torch import torchaudio import numpy as np import gradio as gr from huggingface_hub import hf_hub_download model_path = hf_hub_download( repo_id="DennisHung/Pre-trained_AudioMAE_weights", filename="pretrained.pth", local_dir="./", local_dir_use_symlinks=False ) model_path = hf_hub_download( repo_id="DennisHung/Pre-trained_AudioMAE_weights", filename="pytorch_model.bin", local_dir="./", local_dir_use_symlinks=False ) from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline # Initialize AudioLDM2 Pipeline pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") pipeline.to(device) # Audio morphing function def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"): save_lora_dir = "output" os.makedirs(save_lora_dir, exist_ok=True) # Load audio and compute duration waveform, sample_rate = torchaudio.load(audio_file1) duration = waveform.shape[1] / sample_rate duration = int(duration) # Perform morphing using the pipeline _ = pipeline( audio_file=audio_file1, audio_file2=audio_file2, audio_length_in_s=duration, time_pooling=2, freq_pooling=2, prompt_1=prompt1, prompt_2=prompt2, negative_prompt_1=negative_prompt1, negative_prompt_2=negative_prompt2, save_lora_dir=save_lora_dir, use_adain=True, use_reschedule=False, num_inference_steps=50, lamd=0.6, output_path=save_lora_dir, num_frames=5, fix_lora=None, use_lora=True, lora_steps=50, noisy_latent_with_lora=True, morphing_with_lora=True, use_morph_prompt=True, guidance_scale=7.5, ) # Collect the output file paths output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")] return output_paths # Gradio interface function def interface(audio1, audio2, prompt1, prompt2): output_paths = morph_audio(audio1, audio2, prompt1, prompt2) return output_paths # Gradio Interface demo = gr.Interface( fn=interface, inputs=[ gr.Audio(label="Upload Audio File 1", type="filepath"), gr.Audio(label="Upload Audio File 2", type="filepath"), # gr.Slider(4, 6, step=1, label="Octave 1"), gr.Textbox(label="Prompt for Audio File 1"), gr.Textbox(label="Prompt for Audio File 2") ], outputs=[ gr.Audio(label="Morphing audio 1"), gr.Audio(label="Morphing audio 2"), gr.Audio(label="Morphing audio 3"), gr.Audio(label="Morphing audio 4"), gr.Audio(label="Morphing audio 5"), ], ) if __name__ == "__main__": demo.launch()