File size: 7,849 Bytes
903ebdd
 
 
953b9e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
903ebdd
953b9e7
 
 
 
 
 
 
 
 
 
903ebdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
953b9e7
 
 
 
 
2321e68
953b9e7
 
 
 
 
 
 
 
 
 
2321e68
953b9e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2321e68
 
953b9e7
2321e68
 
953b9e7
2321e68
 
 
953b9e7
2321e68
 
 
 
 
 
 
 
953b9e7
2321e68
 
 
 
 
 
 
 
 
 
 
953b9e7
2321e68
 
953b9e7
2321e68
 
 
 
 
 
 
903ebdd
 
953b9e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import os
import gradio as gr
from pydub import AudioSegment
from audio_separator.separator import Separator
from lib.infer import infer_audio

# Define a function to handle the entire separation process
def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    separator = Separator(output_dir=output_dir)

    # Define output files
    vocals = os.path.join(output_dir, 'Vocals.wav')
    instrumental = os.path.join(output_dir, 'Instrumental.wav')
    vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav')
    vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav')
    lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav')
    backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav')

    # Splitting a track into Vocal and Instrumental
    separator.load_model(model_filename=model_voc_inst)
    voc_inst = separator.separate(input_audio)
    os.rename(os.path.join(output_dir, voc_inst[0]), instrumental)  # Rename to “Instrumental.wav”
    os.rename(os.path.join(output_dir, voc_inst[1]), vocals)        # Rename to “Vocals.wav”

    # Applying DeEcho-DeReverb to Vocals
    separator.load_model(model_filename=model_deecho)
    voc_no_reverb = separator.separate(vocals)
    os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb)  # Rename to “Vocals (No Reverb).wav”
    os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb)     # Rename to “Vocals (Reverb).wav”

    # Separating Back Vocals from Main Vocals
    separator.load_model(model_filename=model_back_voc)
    backing_voc = separator.separate(vocals_no_reverb)
    os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals)  # Rename to “Backing Vocals.wav”
    os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals)     # Rename to “Lead Vocals.wav”

    return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals


# Main function to process audio (Inference)
def process_audio(MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, 
                  FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, 
                  KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio=None):

    # If no sound path is given, use the uploaded file
    if not SOUND_PATH and upload_audio is not None:
        SOUND_PATH = os.path.join("uploaded_audio", upload_audio.name)
        with open(SOUND_PATH, "wb") as f:
            f.write(upload_audio.read())
    
    # Check if a model name is provided
    if not MODEL_NAME:
        return "Please provide a model name."

    # Run the inference
    os.system("chmod +x stftpitchshift")
    inferred_audio = infer_audio(
        MODEL_NAME,
        SOUND_PATH,
        F0_CHANGE,
        F0_METHOD,
        MIN_PITCH,
        MAX_PITCH,
        CREPE_HOP_LENGTH,
        INDEX_RATE,
        FILTER_RADIUS,
        RMS_MIX_RATE,
        PROTECT,
        SPLIT_INFER,
        MIN_SILENCE,
        SILENCE_THRESHOLD,
        SEEK_STEP,
        KEEP_SILENCE,
        FORMANT_SHIFT,
        QUEFRENCY,
        TIMBRE,
        F0_AUTOTUNE,
        OUTPUT_FORMAT
    )
    
    return inferred_audio


# Gradio Blocks Interface with Tabs
with gr.Blocks(title="Hex RVC") as app:
    gr.Markdown("# Hex RVC")
    
    with gr.Tab("Audio Separation"):
        with gr.Row():
            input_audio = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
            output_dir = gr.Textbox(value="/content/output", label="Output Directory")
        
        with gr.Row():
            model_voc_inst = gr.Textbox(value='model_bs_roformer_ep_317_sdr_12.9755.ckpt', label="Vocal & Instrumental Model")
            model_deecho = gr.Textbox(value='UVR-DeEcho-DeReverb.pth', label="DeEcho-DeReverb Model")
            model_back_voc = gr.Textbox(value='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt', label="Backing Vocals Model")
        
        separate_button = gr.Button("Separate Audio")
        
        with gr.Row():
            instrumental_out = gr.Audio(label="Instrumental")
            vocals_out = gr.Audio(label="Vocals")
            vocals_reverb_out = gr.Audio(label="Vocals (Reverb)")
            vocals_no_reverb_out = gr.Audio(label="Vocals (No Reverb)")
            lead_vocals_out = gr.Audio(label="Lead Vocals")
            backing_vocals_out = gr.Audio(label="Backing Vocals")
        
        separate_button.click(
            separate_audio,
            inputs=[input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc],
            outputs=[instrumental_out, vocals_out, vocals_reverb_out, vocals_no_reverb_out, lead_vocals_out, backing_vocals_out]
        )
    
    with gr.Tab("Inference"):
        with gr.Row():
            MODEL_NAME = gr.Textbox(label="Model Name", placeholder="Enter model name")
            SOUND_PATH = gr.Textbox(label="Audio Path (Optional)", placeholder="Leave blank to upload audio")
            upload_audio = gr.File(label="Upload Audio", type='filepath', file_types=["audio"])
        
        with gr.Row():
            F0_CHANGE = gr.Number(label="Pitch Change (semitones)", value=0)
            F0_METHOD = gr.Dropdown(choices=["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", 
                                             "hybrid[mangio-crepe+rmvpe]", "hybrid[mangio-crepe+fcpe]", 
                                             "hybrid[rmvpe+fcpe]", "hybrid[mangio-crepe+rmvpe+fcpe]"], 
                                    label="F0 Method", value="fcpe")
        
        with gr.Row():
            MIN_PITCH = gr.Textbox(label="Min Pitch", value="50")
            MAX_PITCH = gr.Textbox(label="Max Pitch", value="1100")
            CREPE_HOP_LENGTH = gr.Number(label="Crepe Hop Length", value=120)
            INDEX_RATE = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75)
            FILTER_RADIUS = gr.Number(label="Filter Radius", value=3)
            RMS_MIX_RATE = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25)
            PROTECT = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33)
        
        with gr.Accordion("Advanced Settings", open=False):
            SPLIT_INFER = gr.Checkbox(label="Enable Split Inference", value=False)
            MIN_SILENCE = gr.Number(label="Min Silence (ms)", value=500)
            SILENCE_THRESHOLD = gr.Number(label="Silence Threshold (dBFS)", value=-50)
            SEEK_STEP = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1)
            KEEP_SILENCE = gr.Number(label="Keep Silence (ms)", value=200)
            FORMANT_SHIFT = gr.Checkbox(label="Enable Formant Shift", value=False)
            QUEFRENCY = gr.Number(label="Quefrency", value=0)
            TIMBRE = gr.Number(label="Timbre", value=1)
            F0_AUTOTUNE = gr.Checkbox(label="Enable F0 Autotune", value=False)
            OUTPUT_FORMAT = gr.Dropdown(choices=["wav", "flac", "mp3"], label="Output Format", value="wav")
        
        run_button = gr.Button("Run Inference")
        output_audio = gr.Audio(label="Generated Audio", type='filepath')

        run_button.click(
            process_audio, 
            inputs=[MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, 
                    FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, 
                    KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio], 
            outputs=output_audio
        )

# Launch the Gradio app
app.launch()