File size: 15,727 Bytes
d7ff226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fbfeb6
78dd807
717e2ff
e50325c
d7ff226
e50325c
93e5d9e
6979b00
d7ff226
e50325c
db19dc1
2c9b6e2
8091650
db19dc1
 
 
e50325c
d7ff226
 
e50325c
 
d7ff226
 
 
 
e50325c
d7ff226
 
 
e50325c
d7ff226
 
 
 
 
 
e50325c
d7ff226
 
 
e50325c
db725c9
 
e50325c
db725c9
 
d7ff226
e50325c
d7ff226
 
71ce4d9
088fe02
d7ff226
 
 
 
 
 
 
 
e50325c
d7ff226
 
3526004
 
d7ff226
 
e50325c
d7ff226
 
 
 
 
 
 
 
e50325c
d7ff226
 
 
e50325c
 
d7ff226
 
 
 
e50325c
 
d7ff226
 
 
 
e50325c
d7ff226
 
 
 
 
 
 
 
 
 
 
 
 
e50325c
d7ff226
 
 
e50325c
e835e58
 
 
 
 
e50325c
d7ff226
5e5dc03
 
d7ff226
 
 
570e595
e50325c
 
 
d7ff226
 
 
 
 
e50325c
 
 
 
 
 
d7ff226
e50325c
 
 
 
d7ff226
 
 
 
 
e50325c
d7ff226
25c18af
d7ff226
e835e58
004ce63
e50325c
d7ff226
e50325c
 
 
 
d7ff226
 
004ce63
d7ff226
e50325c
7f8a1b2
 
e50325c
dc8a999
 
 
 
e50325c
7f8a1b2
dc8a999
7f8a1b2
e50325c
dc8a999
25c18af
717e2ff
 
12bad8b
 
717e2ff
 
 
 
 
12bad8b
717e2ff
 
 
 
12bad8b
717e2ff
 
e50325c
5683224
25c18af
e50325c
 
e58a7cc
78dd807
e32c131
78dd807
 
5683224
e7c405a
78dd807
25c18af
78dd807
25c18af
7f8a1b2
d782c33
 
 
 
e7c405a
d782c33
 
 
e7c405a
78dd807
 
dc8a999
 
e50325c
 
 
25c18af
d7ff226
 
 
93e5d9e
 
 
25c18af
d7ff226
5e5dc03
d7ff226
 
25c18af
d7ff226
3526004
 
 
d7ff226
6499ba7
 
25c18af
e50325c
 
13ad538
e50325c
 
 
 
6499ba7
 
3526004
6499ba7
 
2385693
 
d511aa1
2385693
 
d7ff226
 
 
5e5dc03
d7ff226
 
e50325c
 
12bad8b
e50325c
7835f68
 
 
 
 
 
 
12bad8b
c6dfd57
d7ff226
e50325c
5e5dc03
e835e58
 
 
25c18af
c6dfd57
e50325c
d7ff226
2385693
18d307c
c6dfd57
e50325c
d6584d2
e50325c
12bad8b
d7ff226
e7c405a
d782c33
 
106b5c7
d782c33
 
 
 
106b5c7
d782c33
 
 
 
 
 
 
 
 
106b5c7
25c18af
 
d782c33
106b5c7
d782c33
e50325c
d782c33
25c18af
 
e50325c
 
 
 
c6dfd57
e50325c
 
 
 
 
 
 
 
 
 
c6dfd57
e50325c
 
 
c6dfd57
12bad8b
25c18af
 
e50325c
3526004
3947069
9335655
a561342
 
3947069
e835e58
004ce63
e50325c
 
 
3947069
2fbfeb6
9335655
25c18af
9335655
25c18af
e50325c
9335655
25c18af
d7ff226
e50325c
ab914b1
e50325c
 
 
 
25c18af
9335655
 
 
 
8c33769
e50325c
 
 
797a642
e50325c
 
797a642
 
 
9335655
 
8c33769
e50325c
 
 
 
 
 
 
 
 
9335655
e50325c
 
 
 
 
 
9335655
 
 
ab914b1
9335655
 
 
797a642
 
3947069
 
 
ab914b1
e50325c
 
797a642
ab914b1
d7ff226
 
6ecdd65
e50325c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
import gradio as gr
import librosa
from PIL import Image, ImageDraw, ImageFont
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, TIT2, TPE1
import io
from colorthief import ColorThief
import colorsys
import math
import os
from multiprocessing import Pool, cpu_count
import tempfile
import ffmpeg
import subprocess
import traceback
import shutil
import LRC2SRT
import sys
import re

flag = 1

path = ""  # Update with your path


def safe_read(i: int, a: list):
    if i >= len(a):
        return 128
    else:
        return a[i]


def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
    i = idx - res // 2
    x, y = size[0] * .9 / -2, (safe_read(i, ta) - 128) * \
        (size[1] / 2000) + (size[1] * .7 / -2)
    c = []
    while i < idx + (res // 2):
        c.append((x, y))
        i += 1
        y = (safe_read(i, ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
        x += (size[0] * .9) / res
    return c


def center_to_top_left(coords, width=1280, height=720):
    new_coords = []
    for x, y in coords:
        new_coords.append(totopleft((x, y), width=width, height=height))
    return new_coords


def totopleft(coord, width=1280, height=720):
    return coord[0] + width / 2, height / 2 - coord[1]


def getTrigger(ad: int, a: list, max: int = 1024) -> int:
    i = ad
    while not (safe_read(i, a) < 126 and safe_read(i+10, a) < 130 or i - ad > max):
        i += 1
    return i


def extract_cover_image(mp3_file):
    audio = MP3(mp3_file, ID3=ID3)
    if audio.tags == None:
        return -1
    for tag in audio.tags.values():
        if isinstance(tag, APIC):
            image_data = tag.data
            cover_image = Image.open(io.BytesIO(image_data))
            return cover_image
    print("No cover image found in the MP3 file.")
    return None


def getTitleAndArtist(mp3_file):
    audio = MP3(mp3_file, ID3=ID3)
    title = audio.get('TIT2', TIT2(encoding=3, text='')).text[0]
    artist = audio.get('TPE1', TPE1(encoding=3, text='')).text[0]
    return title, artist


def getColour(img):
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
        img.save(tmpfile.name, format="PNG")
        color_thief = ColorThief(tmpfile.name)
        dominant_color = color_thief.get_color(quality=1)
    os.remove(tmpfile.name)
    return dominant_color


def clamp(number):
    return max(0, min(number, 1))


def normalizeColour(C):
    cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
    ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
    return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)


def normalizeColourBar(C):
    cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
    ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
    return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)


def stamp_text(draw, text, font, position, align='left'):
    text_bbox = draw.textbbox((0, 0), text, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    x, y = position
    y -= text_height // 2
    if align == 'center':
        x -= text_width // 2
    elif align == 'right':
        x -= text_width

    draw.text((x, y), text, font=font, fill="#fff")


def linear_interpolate(start, stop, progress):
    return start + progress * (stop - start)


def filecount(p):
    files = os.listdir()
    file_count = len(files)
    return file_count


def render_frame(params):
    n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
    num_frames = len(samples_array) // (sr // fps)
    img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
    d = ImageDraw.Draw(img)

    s = math.floor((sr / fps) * n)
    e = center_to_top_left(getRenderCords(samples_array, getTrigger(
        s, samples_array, max=oscres), res=oscres, size=(width, height)), width=width, height=height)
    d.line(e, fill='#fff', width=round(min(2*height/720, 2*width/1280)))

    cs = math.floor(min(width, height) / 2)
    cov = cover_img.resize((cs, cs))
    img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))

    fontT = ImageFont.truetype(
        path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1)
    fontA = ImageFont.truetype(
        path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1)
    fontD = ImageFont.truetype(
        path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1)

    stamp_text(d, title, fontT, totopleft(
        (0, min(width, height) * .3 // -2), width=width, height=height), 'center')
    stamp_text(d, artist, fontA, totopleft(
        (0, min(width, height) * .44 // -2), width=width, height=height), 'center')

    d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
           fill=normalizeColourBar(dominant_color), width=15 * height // 360)
    d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
                               (linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
                                height * .95 // -2)], width=width, height=height), fill='#fff', width=10 * height // 360)

    img.save(path+f'out/{name}/{str(n)}.png', 'PNG')

    return 1  # Indicate one frame processed


def RenderVid(af, n, fps=30):
    (ffmpeg
     .input(path+f'out/{n}/%d.png', framerate=fps)
     .input(af)
     .output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
     .run()
     )
    gr.Interface.download(f"{n}.mp4")


invisible_chars = ["\u200B", "\uFEFF"]


def remove_bom(data: str) -> str:
    BOM = '\ufeff'
    return data.lstrip(BOM)


def stripinvisibles(s):
    e = remove_bom(s)
    for i in invisible_chars:
        e.replace(i, "")
    return e

def start_progress(title):
    global progress_x
    sys.stdout.write(f"{title}:\n")
    sys.stdout.write(f"[{'-'*60}] 0%\r")
    sys.stdout.flush()
    progress_x = 0

def progress(x):
    global progress_x
    sys.stdout.write(f"[{'#'*round(60*x)}{'-'*(60-round(60*x))}] {x:.2%}\r")
    sys.stdout.flush()
    progress_x = x

def end_progress():
    sys.stdout.write(f"[{'#'*60}] 100.00%\r\n")
    sys.stdout.flush()


haslyrics = False


def main(file, name, fps=30, res: tuple = (1280, 720), oscres=512, sr=11025, lyrics=None, img=None, tit=None, ast=None):
    global flag
    p = gr.Progress()
    LRC2SRT.clear()
    if os.path.exists("out.srt"):
        os.remove("out.srt")
    global haslyrics
    haslyrics = False
    if lyrics:
        p(0.5, "parsing lyrics")
        try:
            outf = open("out.srt", mode="x", encoding="UTF8")
            sf = stripinvisibles(open(lyrics, encoding="UTF8").read())
            if sf[0] == '[':
                gr.Info("Lyrics of LRC type was detected, converting to SRT")
                LRC2SRT.convert_to_srt(sf)
                outf.write('\n'.join(LRC2SRT.SRT))
                haslyrics = True
            elif sf[0].isdigit():
                outf.write(sf)
                gr.Info("Lyrics of SRT type was detected")
                haslyrics = True
            else:
                gr.Warning("Lyrics file is invalid, skipping")
        except Exception as e:
            print(traceback.format_exc())
            gr.Warning(
                "Failed to parse lyrics, ensure there are no blank lines in between, you may use Lyrics Editor to ensure compatability")

    os.makedirs(path + f'out/{name}/', exist_ok=True)
    global iii
    iii = 0
    # Load the audio file
    if flag:
        gr.Info("This is the first file since startup, this may take some time")
        flag = 0
    p(0.25, "loading file")
    audio_path = file
    y, sr = librosa.load(audio_path, sr=sr)  # Resample to 11025 Hz
    y_u8 = (y * 128 + 128).astype('uint8')
    samples_array = y_u8.tolist()
    p(0.5, "extracting metadata")
    # Extract cover image, title, and artist
    cover_file = None
    if img:
        cover_file = Image.open(img)
    cover_img = extract_cover_image(audio_path)
    if img:
        cover_img = cover_file
    if cover_img is None:
        raise gr.Error(
            "Mp3 must have a cover image, upload the image under the 'Metadata' section", duration=None)
    elif cover_img == -1 and not (tit or ast or img):
        raise gr.Error(
            "Mp3 is missing tags, add the info under the 'Metadata' section", duration=None)

    title, artist = getTitleAndArtist(audio_path)
    if tit and ast:
        title, artist = tit, ast
    if title == '' or artist == '':
        gr.Warning('Missing Title or Artist')
    if img:
        color_thief = ColorThief(img)
        dominant_color = color_thief.get_color(quality=1)
        cover_img = cover_file
    else:
        dominant_color = getColour(cover_img)

    # Frame rendering parameters
    width, height, fps = res[0], res[1], fps
    num_frames = len(samples_array) // (sr // fps)

    # Prepare parameters for each frame
    params = [(n, samples_array, cover_img, title, artist, dominant_color,
               width, height, fps, name, oscres, sr) for n in range(num_frames)]
    print('-'*50)
    print('Info:' + "External" if img else "ID3")
    print("Title: " + title)
    print("Artist: " + artist)
    print(f'Resolution: {str(width)}x{str(height)}')
    print("Background Colour: " + str(dominant_color))
    print('Framerate: ' + str(fps))
    print('Frame Count: ' + str(num_frames))
    print('Segments per frame: ' + str(oscres))
    print('-'*50)
    #start_progress("Rendering:")
    try:
        with Pool(cpu_count()-1) as pool:
            num_frames = len(samples_array) // (sr // fps)
            # Use imap to get progress updates
            for _ in pool.imap_unordered(render_frame, params):
                iii += 1  # Increment frame count for progress
                p((iii, num_frames), desc="Rendering Frames")
                #progress(iii/num_frames)

    except Exception as e:
        raise gr.Error("Something went wrong whilst rendering")
    #finally:
        #end_progress()

    p = gr.Progress()
    p(0, desc="Compiling video")
    print('-'*50)
    print('FFMPEG')
    if haslyrics:
        ffmpeg_cmd = [
            "ffmpeg",
            '-framerate', str(fps),
            '-i', path + f'out/{name}/%d.png',  # Input PNG images
            '-i', file,  # Input MP3 audio
            '-i', path + 'out.srt',  # Input SRT subtitles
            '-c:v', 'libx264',
            '-r', str(fps),
            '-pix_fmt', 'yuv420p',
            '-c:a', 'aac',
            '-c:s', 'mov_text',  # Use mov_text codec for subtitles
            '-y',
            path + f'{name}.mp4'  # Output MP4 filename
        ]
    else:
        ffmpeg_cmd = [
            "ffmpeg",
            '-framerate', str(fps),
            '-i', path + f'out/{name}/%d.png',  # Input PNG images
            '-i', f'{file}',  # Input MP3 audio
            '-c:v', 'libx264',
            '-r', str(fps),
            '-pix_fmt', 'yuv420p',
            '-c:a', 'aac',
            '-y',
            path + f'{name}.mp4'  # Output MP4 filename
        ]
    process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    # Regular expression to match frame information
    frame_re = re.compile(r"frame=\s*(\d+)")
    p = gr.Progress()
    #start_progress('Compliling video')
    while True:
        output = process.stderr.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            # Check if the output line contains frame information
            match = frame_re.search(output)
            if match:
                frame = match.group(1)
                p((int(frame), num_frames), desc="Compiling Video")
                #progress(int(frame)/num_frames)

    # Wait for the process to complete
    process.wait()
    #end_progress()
    print('-'*50)
    return f"{name}.mp4", haslyrics


def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, img=None, tit=None, ast=None):
    if audio_file is None:
        raise gr.Error("Please Provide an Audio File")
    if not str(audio_file).endswith(".mp3"):
        raise gr.Error("Only MP3 Files are supported at this time")

    resolution = f"{vidwidth}x{vidheight}"
    res = tuple(map(int, resolution.split('x')))
    video_file, haslyrics = main(audio_file, output_name, fps=fps,
                                 res=res, oscres=oscres, lyrics=lyrics, img=img, tit=tit, ast=ast)

    # Clean up the temporary file
    shutil.rmtree("out")

    srt_output = "out.srt" if haslyrics else None
    return video_file, srt_output, haslyrics


def update_srt_output_visibility(haslyrics):
    return gr.update(visible=haslyrics)


with gr.Blocks() as demo:
    gr.Markdown(
        'Upload an MP3 file and configure parameters to create a visualization video.')
    gr.Markdown(
        'Optionally upload a word or line synced lyric file in the advanced section.')

    with gr.Row():
        # Inputs on the left
        with gr.Column():
            with gr.Accordion(label="Audio Settings", open=True):
                gr.Markdown('## Load your mp3 file here')
                audio_file = gr.File(
                    label="Upload your MP3 file", file_count='single', file_types=['mp3'])

            with gr.Accordion(label="Mp3 Metadata", open=False):
                gr.Markdown(
                    '## Add Metadata here if your mp3 does not have one')
                cover_img = gr.Image(label='Cover Art', type="filepath")
                title_input = gr.Textbox(label='Title')
                artist_input = gr.Textbox(label='Artists')

            with gr.Accordion(label="Video Output Settings", open=False):
                gr.Markdown('## Configure Video Output Here')
                output_name = gr.Textbox(
                    label="Output Video Name", value='Output')
                fps_slider = gr.Slider(
                    label="Frames per Second", minimum=20, maximum=60, step=1, value=30)
                vidwidth_slider = gr.Slider(
                    label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2)
                vidheight_slider = gr.Slider(
                    label="Output Video Height", minimum=100, maximum=2000, value=720, step=2)

            with gr.Accordion(label="Advanced Options", open=False):
                oscres_slider = gr.Slider(
                    label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=1024)
                gr.Markdown(
                    'If uploading LRC, ensure a blank timed line at the end to avoid conversion errors')
                lyrics_file = gr.File(label="(Optional) Upload Lyrics as LRC or SRT",
                                      file_count='single', file_types=['lrc', 'srt'])

            # Add a submit button
            submit_btn = gr.Button("Generate Video")

        # Outputs on the right
        with gr.Column():
            output_video = gr.Video(label="Output")
            with gr.Accordion(label="SRT File (Only used if lyrics is provided)", open=False):
                srt_output = gr.File(label="SRT Output")

    # Bind the button to the function
    submit_btn.click(
        fn=gradio_interface,
        inputs=[audio_file, lyrics_file, output_name, fps_slider, vidwidth_slider,
                vidheight_slider, oscres_slider, cover_img, title_input, artist_input],
        outputs=[output_video, srt_output]
    )

# Launch Gradio interface
if __name__ == '__main__':
    demo.launch()