import os
import binascii
import warnings
import json
import argparse
import copy
import numpy as np
import matplotlib.pyplot as plt
import torch
import tqdm
import librosa
import librosa.display
import soundfile as sf
import gradio as gr
import pytube as pt
from pytube.exceptions import VideoUnavailable
from inference.style_transfer import *
yt_video_dir = "./yt_dir/0"
os.makedirs(yt_video_dir, exist_ok=True)
def get_audio_from_yt_video_input(yt_link: str):
try:
yt = pt.YouTube(yt_link)
t = yt.streams.filter(only_audio=True)
filename_in = os.path.join(yt_video_dir, "input.wav")
t[0].download(filename=filename_in)
except VideoUnavailable as e:
warnings.warn(f"Video Not Found at {yt_link} ({e})")
filename_in = None
return filename_in, filename_in
def get_audio_from_yt_video_ref(yt_link: str):
try:
yt = pt.YouTube(yt_link)
t = yt.streams.filter(only_audio=True)
filename_ref = os.path.join(yt_video_dir, "reference.wav")
t[0].download(filename=filename_ref)
except VideoUnavailable as e:
warnings.warn(f"Video Not Found at {yt_link} ({e})")
filename_ref = None
return filename_ref, filename_ref
def inference(file_uploaded_in, file_uploaded_ref):
output_wav_path = None
# Perform music mixing style transfer
args = set_up()
inference_style_transfer = Mixing_Style_Transfer_Inference(args)
output_wav_path = inference_style_transfer.inference(None, None)
return output_wav_path, output_wav_path
with gr.Blocks() as demo:
gr.HTML(
"""
Music Mixing Style Transfer
Hugging Face interactive demo of the paper "Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects" (ICASSP 2023).
a
"""
)
with gr.Group():
with gr.Column():
with gr.Blocks():
with gr.Tab("Input Music"):
file_uploaded_in = gr.Audio(label="Input track (mix) to be mixing style transferred", type="filepath")
with gr.Tab("YouTube url"):
with gr.Row():
yt_link_in = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_btn_in = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path_in = gr.Audio(
label="Input Audio Extracted from the YouTube Video", interactive=False
)
yt_btn_in.click(
get_audio_from_yt_video_input,
inputs=[yt_link_in],
outputs=[yt_audio_path_in, file_uploaded_in],
)
with gr.Blocks():
with gr.Tab("Reference Music"):
file_uploaded_ref = gr.Audio(label="Reference track (mix) to copy mixing style", type="filepath")
with gr.Tab("YouTube url"):
with gr.Row():
yt_link_ref = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_btn_ref = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path_ref = gr.Audio(
label="Reference Audio Extracted from the YouTube Video", interactive=False
)
yt_btn_ref.click(
get_audio_from_yt_video_ref,
inputs=[yt_link_ref],
outputs=[yt_audio_path_ref, file_uploaded_ref],
)
with gr.Column():
inference_btn = gr.Button("Perform Style Transfer")
with gr.Group():
gr.HTML(
"""
Mixing Style Transferred Output.
"""
)
with gr.Row().style(mobile_collapse=False, equal_height=True):
output_mix_l = gr.Audio(label="Listen to output mix")
output_mix_p = gr.Audio(label="Download output mix")
with gr.Row():
output_mix = gr.File(label="Download style transferred music track")
inference_btn.click(
inference,
inputs=[file_uploaded_in, file_uploaded_ref],
outputs=[output_mix_l, output_mix_p],
)
if __name__ == "__main__":
demo.launch(debug=True)