File size: 4,696 Bytes
6b95f60
77a1faa
 
 
6b95f60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166943a
 
6b95f60
e551a22
 
6b95f60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86c8cd3
166943a
 
 
 
 
 
6b95f60
86c8cd3
6b95f60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4534858
 
 
6b95f60
 
 
 
 
 
86c8cd3
 
 
6b95f60
 
4534858
6b95f60
 
86c8cd3
6b95f60
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import binascii
import warnings

import json
import argparse
import copy

import numpy as np
import matplotlib.pyplot as plt
import torch
import tqdm
import librosa
import librosa.display
import soundfile as sf
import gradio as gr
import pytube as pt

from pytube.exceptions import VideoUnavailable

from inference.style_transfer import *


yt_video_dir = "./yt_dir"
os.makedirs(yt_video_dir, exist_ok=True)


def get_audio_from_yt_video(yt_link: str):
    try:
        yt = pt.YouTube(yt_link)
        t = yt.streams.filter(only_audio=True)
        filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".wav")
        t[0].download(filename=filename)
    except VideoUnavailable as e:
        warnings.warn(f"Video Not Found at {yt_link} ({e})")
        filename = None

    return filename, filename


def inference(file_uploaded_in, file_uploaded_ref):

    output_wav_path = None

    # Perform music mixing style transfer
    args = set_up()
    
    inference_style_transfer = Mixing_Style_Transfer_Inference(args)
    output_wav_path = inference_style_transfer.inference(None, None)
    
    return output_wav_path, output_wav_path


with gr.Blocks() as demo:
    gr.HTML(
        """
            <div style="text-align: center; max-width: 700px; margin: 0 auto;">
              <div
                style="
                  display: inline-flex;
                  align-items: center;
                  gap: 0.8rem;
                  font-size: 1.75rem;
                "
              >
                <h1 style="font-weight: 900; margin-bottom: 7px;">
                  Music Mixing Style Transfer
                </h1>
              </div>
              <p style="margin-bottom: 10px; font-size: 94%">
                Hugging Face interactive demo of the paper "Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects" (ICASSP 2023).<br>
                a
            </div>
        """
    )
    with gr.Group():
        with gr.Column():
            with gr.Blocks():
                with gr.Tab("Input Music"):
                    file_uploaded_in = gr.Audio(label="Input track (mix) to be mixing style transferred", type="filepath")
                with gr.Tab("YouTube url"):
                    with gr.Row():
                        yt_link_in = gr.Textbox(
                            label="Enter YouTube Link of the Video", autofocus=True, lines=3
                        )
                        yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
                    yt_audio_path = gr.Audio(
                        label="Input Audio Extracted from the YouTube Video", interactive=False
                    )
                    yt_btn.click(
                        get_audio_from_yt_video,
                        inputs=[yt_link_in],
                        outputs=[yt_audio_path, file_uploaded_in],
                    )
            with gr.Blocks():
                with gr.Tab("Reference Music"):
                    file_uploaded_ref = gr.Audio(label="Reference track (mix) to copy mixing style", type="filepath")
                with gr.Tab("YouTube url"):
                    with gr.Row():
                        yt_link_ref = gr.Textbox(
                            label="Enter YouTube Link of the Video", autofocus=True, lines=3
                        )
                        yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
                    yt_audio_path = gr.Audio(
                        label="Reference Audio Extracted from the YouTube Video", interactive=False
                    )
                    yt_btn.click(
                        get_audio_from_yt_video,
                        inputs=[yt_link_ref],
                        outputs=[yt_audio_path, file_uploaded_ref],
                    )
            with gr.Column():
                inference_btn = gr.Button("Perform Style Transfer")
                
    with gr.Group():
        gr.HTML(
            """
            <div> <h3> <center> Mixing Style Transferred Output. </h3> </div>
            """
        )
        with gr.Row().style(mobile_collapse=False, equal_height=True):
            output_mix_l = gr.Audio(label="Listen to output mix")
            output_mix_p = gr.Audio(label="Download output mix")
        with gr.Row():
            output_mix = gr.File(label="Download style transferred music track")
            inference_btn.click(
                inference,
                inputs=[file_uploaded_in, file_uploaded_ref],
                outputs=[output_mix_l, output_mix_p],
            )


    
if __name__ == "__main__":
    demo.launch(debug=True)