File size: 13,165 Bytes
3b7b011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
import sys

sys.path.append("..")
import os
import shutil

now_dir = os.getcwd()
import soundfile as sf
import librosa
from lib.tools import audioEffects
from assets.i18n.i18n import I18nAuto

i18n = I18nAuto()
import gradio as gr
import tabs.resources as resources
import numpy as np
from scipy.signal import resample

def save_to_wav2(dropbox):
    file_path = dropbox.name
    target_path = os.path.join("assets","audios", os.path.basename(file_path))

    if os.path.exists(target_path):
        os.remove(target_path)
        print("Replacing old dropdown file...")

    shutil.move(file_path, target_path)
    return target_path


audio_root = "assets/audios"
audio_others_root = "assets/audios/audio-others"
sup_audioext = {
    "wav",
    "mp3",
    "flac",
    "ogg",
    "opus",
    "m4a",
    "mp4",
    "aac",
    "alac",
    "wma",
    "aiff",
    "webm",
    "ac3",
}
audio_paths = [
    os.path.join(root, name)
    for root, _, files in os.walk(audio_root, topdown=False)
    for name in files
    if name.endswith(tuple(sup_audioext)) and root == audio_root
]

audio_others_paths = [
    os.path.join(root, name)
    for root, _, files in os.walk(audio_others_root, topdown=False)
    for name in files
    if name.endswith(tuple(sup_audioext)) and root == audio_others_root
]


def change_choices3():
    audio_paths = [
    os.path.join(root, name)
    for root, _, files in os.walk(audio_root, topdown=False)
    for name in files
    if name.endswith(tuple(sup_audioext)) and root == audio_root
    ]
    audio_others_paths = [
    os.path.join(root, name)
    for root, _, files in os.walk(audio_others_root, topdown=False)
    for name in files
    if name.endswith(tuple(sup_audioext)) and root == audio_others_root
    ]

    return (
        {"choices": sorted(audio_others_paths), "__type__": "update"},
        {"choices": sorted(audio_paths), "__type__": "update"},
    )


def generate_output_path(output_folder, base_name, extension):
    index = 1
    while True:
        output_path = os.path.join(output_folder, f"{base_name}_{index}.{extension}")
        if not os.path.exists(output_path):
            return output_path
        index += 1

from pydub import AudioSegment
from pydub.silence import detect_nonsilent
import glob
import re
def combine_and_save_audios(
    audio1_path, audio2_path, output_path, volume_factor_audio1, volume_factor_audio2
):
    audio1 = AudioSegment.from_file(audio1_path)
    audio2 = AudioSegment.from_file(audio2_path)
    
    # Verificar cu谩l audio tiene mayor longitud
    if len(audio1) > len(audio2):
        # Calcular la diferencia en duraci贸n en segundos
        diff_duration_seconds = (len(audio1) - len(audio2)) / 1000.0  # Convertir a segundos
        print(f"diff_duration_seconds: {diff_duration_seconds} seconds")
        # Crear el segmento de silencio en Pydub
        silence = AudioSegment.silent(duration=int(diff_duration_seconds))  # Convertir a milisegundos

        # Agregar el silencio al audio2 para igualar la duraci贸n
        audio2 = audio2 + silence
    else:
        # Calcular la diferencia en duraci贸n en segundos
        diff_duration_seconds = (len(audio2) - len(audio1)) / 1000.0  # Convertir a segundos
        print(f"diff_duration_seconds: {diff_duration_seconds} seconds")
        # Crear el segmento de silencio en Pydub
        silence = AudioSegment.silent(duration=int(diff_duration_seconds))  # Convertir a milisegundos

        # Agregar el silencio al audio1 para igualar la duraci贸n
        audio1 = audio1 + silence

    # Ajustar el volumen de los audios multiplicando por el factor de ganancia
    if volume_factor_audio1 != 1.0:
        audio1 *= volume_factor_audio1
    if volume_factor_audio2 != 1.0:
        audio2 *= volume_factor_audio2

    # Combinar los audios
    combined_audio = audio1.overlay(audio2)

    # Guardar el audio combinado en el archivo de salida
    combined_audio.export(output_path, format="wav")


def audio_combined(
    audio1_path,
    audio2_path,
    volume_factor_audio1=1.0,
    volume_factor_audio2=1.0,
    reverb_enabled=False,
    compressor_enabled=False,
    noise_gate_enabled=False,
):
    output_folder = os.path.join(now_dir,"assets", "audios", "audio-outputs")
    os.makedirs(output_folder, exist_ok=True)

    # Generar nombres 煤nicos para los archivos de salida
    base_name = "combined_audio"
    extension = "wav"
    output_path = generate_output_path(output_folder, base_name, extension)
    print(reverb_enabled)
    print(compressor_enabled)
    print(noise_gate_enabled)

    if reverb_enabled or compressor_enabled or noise_gate_enabled:
        # Procesa el primer audio con los efectos habilitados
        base_name = "effect_audio"
        output_path = generate_output_path(output_folder, base_name, extension)
        processed_audio_path = audioEffects.process_audio(
            audio2_path,
            output_path,
            reverb_enabled,
            compressor_enabled,
            noise_gate_enabled,
        )
        base_name = "combined_audio"
        output_path = generate_output_path(output_folder, base_name, extension)
        # Combina el audio procesado con el segundo audio usando audio_combined
        combine_and_save_audios(
            audio1_path,
            processed_audio_path,
            output_path,
            volume_factor_audio1,
            volume_factor_audio2,
        )

        return i18n("Conversion complete!"), output_path
    else:
        base_name = "combined_audio"
        output_path = generate_output_path(output_folder, base_name, extension)
        # No hay efectos habilitados, combina directamente los audios sin procesar
        combine_and_save_audios(
            audio1_path,
            audio2_path,
            output_path,
            volume_factor_audio1,
            volume_factor_audio2,
        )

        return i18n("Conversion complete!"), output_path

def process_audio(file_path):
    try:
        # load audio file
        song = AudioSegment.from_file(file_path)

        print(f"Ignore the warning if you saw any...")

        # set silence threshold and duration
        silence_thresh = -70  # dB
        min_silence_len = 750  # ms, adjust as needed

        # detect nonsilent parts
        nonsilent_parts = detect_nonsilent(song, min_silence_len=min_silence_len, silence_thresh=silence_thresh)

        # Create a new directory to store chunks
        file_dir = os.path.dirname(file_path)
        file_name = os.path.basename(file_path).split('.')[0]
        new_dir_path = os.path.join(file_dir, file_name)
        os.makedirs(new_dir_path, exist_ok=True)

        # Check if timestamps file exists, if so delete it
        timestamps_file = os.path.join(file_dir, f"{file_name}_timestamps.txt")
        if os.path.isfile(timestamps_file):
            os.remove(timestamps_file)

        # export chunks and save start times
        segment_count = 0
        for i, (start_i, end_i) in enumerate(nonsilent_parts):
            chunk = song[start_i:end_i]
            chunk_file_path = os.path.join(new_dir_path, f"chunk{i}.wav")
            chunk.export(chunk_file_path, format="wav")

            print(f"Segment {i} created!")
            segment_count += 1
            
            # write start times to file
            with open(timestamps_file, "a", encoding="utf-8") as f:
                f.write(f"{chunk_file_path} starts at {start_i} ms\n")
        
        print(f"Total segments created: {segment_count}")
        print(f"Split all chunks for {file_path} successfully!")

        return "Finish", new_dir_path

    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error", None


def merge_audio(timestamps_file):
    try:
        # Extract prefix from the timestamps filename
        prefix = os.path.basename(timestamps_file).replace('_timestamps.txt', '')
        timestamps_dir = os.path.dirname(timestamps_file)
        print(timestamps_dir)
        print(prefix)

        # Open the timestamps file
        with open(timestamps_file, "r", encoding="utf-8") as f:
            lines = f.readlines()

        # Initialize empty list to hold audio segments
        audio_segments = []
        last_end_time = 0

        print(f"Processing file: {timestamps_file}")

        for line in lines:
            # Extract filename and start time from line
            match = re.search(r"(chunk\d+.wav) starts at (\d+) ms", line)
            if match:
                filename, start_time = match.groups()
                start_time = int(start_time)

                # Construct the complete path to the chunk file
                chunk_file = os.path.join(timestamps_dir, prefix, filename)

                # Add silence from last_end_time to start_time
                silence_duration = max(start_time - last_end_time, 0)
                silence = AudioSegment.silent(duration=silence_duration)
                audio_segments.append(silence)

                # Load audio file and append to list
                audio = AudioSegment.from_wav(chunk_file)
                audio_segments.append(audio)

                # Update last_end_time
                last_end_time = start_time + len(audio)

                print(f"Processed chunk: {chunk_file}")

        # Concatenate all audio_segments and export
        merged_filename = f"{prefix}_merged.wav"
        merged_audio = sum(audio_segments)
        merged_audio.export(os.path.join(timestamps_dir, "audio-outputs", merged_filename), format="wav")

        print(f"Exported merged file: {merged_filename}\n")

    except Exception as e:
        print(f"An error occurred: {e}")




def merge_audios():
        gr.Markdown(
            value="## " + i18n("Merge your generated audios with the instrumental")
        )
        with gr.Row():
            with gr.Column():
                dropbox = gr.File(label=i18n("Drag your audio here:"))
                gr.Markdown(value=i18n("### Instrumental settings:"))
                input_audio1 = gr.Dropdown(
                    label=i18n("Choose your instrumental:"),
                    choices=sorted(audio_others_paths),
                    value="",
                    interactive=True,
                )
                input_audio1_scale = gr.Slider(
                    minimum=0,
                    maximum=10,
                    label=i18n("Volume of the instrumental audio:"),
                    value=1.00,
                    interactive=True,
                )
                gr.Markdown(value=i18n("### Audio settings:"))
                input_audio3 = gr.Dropdown(
                    label=i18n("Select the generated audio"),
                    choices=sorted(audio_paths),
                    value="",
                    interactive=True,
                )
                with gr.Row():
                    input_audio3_scale = gr.Slider(
                        minimum=0,
                        maximum=10,
                        label=i18n("Volume of the generated audio:"),
                        value=1.00,
                        interactive=True,
                    )

                gr.Markdown(value=i18n("### Add the effects:"))
                reverb_ = gr.Checkbox(
                    label=i18n("Reverb"),
                    value=False,
                    interactive=True,
                )
                compressor_ = gr.Checkbox(
                    label=i18n("Compressor"),
                    value=False,
                    interactive=True,
                )
                noise_gate_ = gr.Checkbox(
                    label=i18n("Noise Gate"),
                    value=False,
                    interactive=True,
                )
                with gr.Row():
                    butnone = gr.Button(i18n("Merge"), variant="primary").style(
                        full_width=True
                    )
                    refresh_button = gr.Button(
                        i18n("Refresh"), variant="primary"
                    ).style(full_width=True)

                vc_output1 = gr.Textbox(label=i18n("Output information:"))
                vc_output2 = gr.Audio(
                    label=i18n(
                        "Export audio (click on the three dots in the lower right corner to download)"
                    ),
                    type="filepath",
                )

                dropbox.upload(
                    fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio1]
                )

                refresh_button.click(
                    fn=lambda: change_choices3(),
                    inputs=[],
                    outputs=[input_audio1, input_audio3],
                )

                butnone.click(
                    fn=audio_combined,
                    inputs=[
                        input_audio1,
                        input_audio3,
                        input_audio1_scale,
                        input_audio3_scale,
                        reverb_,
                        compressor_,
                        noise_gate_,
                    ],
                    outputs=[vc_output1, vc_output2],
                )