Spaces:
Running
Running
File size: 15,727 Bytes
d7ff226 2fbfeb6 78dd807 717e2ff e50325c d7ff226 e50325c 93e5d9e 6979b00 d7ff226 e50325c db19dc1 2c9b6e2 8091650 db19dc1 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c db725c9 e50325c db725c9 d7ff226 e50325c d7ff226 71ce4d9 088fe02 d7ff226 e50325c d7ff226 3526004 d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c e835e58 e50325c d7ff226 5e5dc03 d7ff226 570e595 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 e50325c d7ff226 25c18af d7ff226 e835e58 004ce63 e50325c d7ff226 e50325c d7ff226 004ce63 d7ff226 e50325c 7f8a1b2 e50325c dc8a999 e50325c 7f8a1b2 dc8a999 7f8a1b2 e50325c dc8a999 25c18af 717e2ff 12bad8b 717e2ff 12bad8b 717e2ff 12bad8b 717e2ff e50325c 5683224 25c18af e50325c e58a7cc 78dd807 e32c131 78dd807 5683224 e7c405a 78dd807 25c18af 78dd807 25c18af 7f8a1b2 d782c33 e7c405a d782c33 e7c405a 78dd807 dc8a999 e50325c 25c18af d7ff226 93e5d9e 25c18af d7ff226 5e5dc03 d7ff226 25c18af d7ff226 3526004 d7ff226 6499ba7 25c18af e50325c 13ad538 e50325c 6499ba7 3526004 6499ba7 2385693 d511aa1 2385693 d7ff226 5e5dc03 d7ff226 e50325c 12bad8b e50325c 7835f68 12bad8b c6dfd57 d7ff226 e50325c 5e5dc03 e835e58 25c18af c6dfd57 e50325c d7ff226 2385693 18d307c c6dfd57 e50325c d6584d2 e50325c 12bad8b d7ff226 e7c405a d782c33 106b5c7 d782c33 106b5c7 d782c33 106b5c7 25c18af d782c33 106b5c7 d782c33 e50325c d782c33 25c18af e50325c c6dfd57 e50325c c6dfd57 e50325c c6dfd57 12bad8b 25c18af e50325c 3526004 3947069 9335655 a561342 3947069 e835e58 004ce63 e50325c 3947069 2fbfeb6 9335655 25c18af 9335655 25c18af e50325c 9335655 25c18af d7ff226 e50325c ab914b1 e50325c 25c18af 9335655 8c33769 e50325c 797a642 e50325c 797a642 9335655 8c33769 e50325c 9335655 e50325c 9335655 ab914b1 9335655 797a642 3947069 ab914b1 e50325c 797a642 ab914b1 d7ff226 6ecdd65 e50325c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 |
import gradio as gr
import librosa
from PIL import Image, ImageDraw, ImageFont
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, TIT2, TPE1
import io
from colorthief import ColorThief
import colorsys
import math
import os
from multiprocessing import Pool, cpu_count
import tempfile
import ffmpeg
import subprocess
import traceback
import shutil
import LRC2SRT
import sys
import re
flag = 1
path = "" # Update with your path
def safe_read(i: int, a: list):
if i >= len(a):
return 128
else:
return a[i]
def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
i = idx - res // 2
x, y = size[0] * .9 / -2, (safe_read(i, ta) - 128) * \
(size[1] / 2000) + (size[1] * .7 / -2)
c = []
while i < idx + (res // 2):
c.append((x, y))
i += 1
y = (safe_read(i, ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
x += (size[0] * .9) / res
return c
def center_to_top_left(coords, width=1280, height=720):
new_coords = []
for x, y in coords:
new_coords.append(totopleft((x, y), width=width, height=height))
return new_coords
def totopleft(coord, width=1280, height=720):
return coord[0] + width / 2, height / 2 - coord[1]
def getTrigger(ad: int, a: list, max: int = 1024) -> int:
i = ad
while not (safe_read(i, a) < 126 and safe_read(i+10, a) < 130 or i - ad > max):
i += 1
return i
def extract_cover_image(mp3_file):
audio = MP3(mp3_file, ID3=ID3)
if audio.tags == None:
return -1
for tag in audio.tags.values():
if isinstance(tag, APIC):
image_data = tag.data
cover_image = Image.open(io.BytesIO(image_data))
return cover_image
print("No cover image found in the MP3 file.")
return None
def getTitleAndArtist(mp3_file):
audio = MP3(mp3_file, ID3=ID3)
title = audio.get('TIT2', TIT2(encoding=3, text='')).text[0]
artist = audio.get('TPE1', TPE1(encoding=3, text='')).text[0]
return title, artist
def getColour(img):
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
img.save(tmpfile.name, format="PNG")
color_thief = ColorThief(tmpfile.name)
dominant_color = color_thief.get_color(quality=1)
os.remove(tmpfile.name)
return dominant_color
def clamp(number):
return max(0, min(number, 1))
def normalizeColour(C):
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
def normalizeColourBar(C):
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
def stamp_text(draw, text, font, position, align='left'):
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x, y = position
y -= text_height // 2
if align == 'center':
x -= text_width // 2
elif align == 'right':
x -= text_width
draw.text((x, y), text, font=font, fill="#fff")
def linear_interpolate(start, stop, progress):
return start + progress * (stop - start)
def filecount(p):
files = os.listdir()
file_count = len(files)
return file_count
def render_frame(params):
n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
num_frames = len(samples_array) // (sr // fps)
img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
d = ImageDraw.Draw(img)
s = math.floor((sr / fps) * n)
e = center_to_top_left(getRenderCords(samples_array, getTrigger(
s, samples_array, max=oscres), res=oscres, size=(width, height)), width=width, height=height)
d.line(e, fill='#fff', width=round(min(2*height/720, 2*width/1280)))
cs = math.floor(min(width, height) / 2)
cov = cover_img.resize((cs, cs))
img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
fontT = ImageFont.truetype(
path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1)
fontA = ImageFont.truetype(
path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1)
fontD = ImageFont.truetype(
path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1)
stamp_text(d, title, fontT, totopleft(
(0, min(width, height) * .3 // -2), width=width, height=height), 'center')
stamp_text(d, artist, fontA, totopleft(
(0, min(width, height) * .44 // -2), width=width, height=height), 'center')
d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
fill=normalizeColourBar(dominant_color), width=15 * height // 360)
d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
(linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
height * .95 // -2)], width=width, height=height), fill='#fff', width=10 * height // 360)
img.save(path+f'out/{name}/{str(n)}.png', 'PNG')
return 1 # Indicate one frame processed
def RenderVid(af, n, fps=30):
(ffmpeg
.input(path+f'out/{n}/%d.png', framerate=fps)
.input(af)
.output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
.run()
)
gr.Interface.download(f"{n}.mp4")
invisible_chars = ["\u200B", "\uFEFF"]
def remove_bom(data: str) -> str:
BOM = '\ufeff'
return data.lstrip(BOM)
def stripinvisibles(s):
e = remove_bom(s)
for i in invisible_chars:
e.replace(i, "")
return e
def start_progress(title):
global progress_x
sys.stdout.write(f"{title}:\n")
sys.stdout.write(f"[{'-'*60}] 0%\r")
sys.stdout.flush()
progress_x = 0
def progress(x):
global progress_x
sys.stdout.write(f"[{'#'*round(60*x)}{'-'*(60-round(60*x))}] {x:.2%}\r")
sys.stdout.flush()
progress_x = x
def end_progress():
sys.stdout.write(f"[{'#'*60}] 100.00%\r\n")
sys.stdout.flush()
haslyrics = False
def main(file, name, fps=30, res: tuple = (1280, 720), oscres=512, sr=11025, lyrics=None, img=None, tit=None, ast=None):
global flag
p = gr.Progress()
LRC2SRT.clear()
if os.path.exists("out.srt"):
os.remove("out.srt")
global haslyrics
haslyrics = False
if lyrics:
p(0.5, "parsing lyrics")
try:
outf = open("out.srt", mode="x", encoding="UTF8")
sf = stripinvisibles(open(lyrics, encoding="UTF8").read())
if sf[0] == '[':
gr.Info("Lyrics of LRC type was detected, converting to SRT")
LRC2SRT.convert_to_srt(sf)
outf.write('\n'.join(LRC2SRT.SRT))
haslyrics = True
elif sf[0].isdigit():
outf.write(sf)
gr.Info("Lyrics of SRT type was detected")
haslyrics = True
else:
gr.Warning("Lyrics file is invalid, skipping")
except Exception as e:
print(traceback.format_exc())
gr.Warning(
"Failed to parse lyrics, ensure there are no blank lines in between, you may use Lyrics Editor to ensure compatability")
os.makedirs(path + f'out/{name}/', exist_ok=True)
global iii
iii = 0
# Load the audio file
if flag:
gr.Info("This is the first file since startup, this may take some time")
flag = 0
p(0.25, "loading file")
audio_path = file
y, sr = librosa.load(audio_path, sr=sr) # Resample to 11025 Hz
y_u8 = (y * 128 + 128).astype('uint8')
samples_array = y_u8.tolist()
p(0.5, "extracting metadata")
# Extract cover image, title, and artist
cover_file = None
if img:
cover_file = Image.open(img)
cover_img = extract_cover_image(audio_path)
if img:
cover_img = cover_file
if cover_img is None:
raise gr.Error(
"Mp3 must have a cover image, upload the image under the 'Metadata' section", duration=None)
elif cover_img == -1 and not (tit or ast or img):
raise gr.Error(
"Mp3 is missing tags, add the info under the 'Metadata' section", duration=None)
title, artist = getTitleAndArtist(audio_path)
if tit and ast:
title, artist = tit, ast
if title == '' or artist == '':
gr.Warning('Missing Title or Artist')
if img:
color_thief = ColorThief(img)
dominant_color = color_thief.get_color(quality=1)
cover_img = cover_file
else:
dominant_color = getColour(cover_img)
# Frame rendering parameters
width, height, fps = res[0], res[1], fps
num_frames = len(samples_array) // (sr // fps)
# Prepare parameters for each frame
params = [(n, samples_array, cover_img, title, artist, dominant_color,
width, height, fps, name, oscres, sr) for n in range(num_frames)]
print('-'*50)
print('Info:' + "External" if img else "ID3")
print("Title: " + title)
print("Artist: " + artist)
print(f'Resolution: {str(width)}x{str(height)}')
print("Background Colour: " + str(dominant_color))
print('Framerate: ' + str(fps))
print('Frame Count: ' + str(num_frames))
print('Segments per frame: ' + str(oscres))
print('-'*50)
#start_progress("Rendering:")
try:
with Pool(cpu_count()-1) as pool:
num_frames = len(samples_array) // (sr // fps)
# Use imap to get progress updates
for _ in pool.imap_unordered(render_frame, params):
iii += 1 # Increment frame count for progress
p((iii, num_frames), desc="Rendering Frames")
#progress(iii/num_frames)
except Exception as e:
raise gr.Error("Something went wrong whilst rendering")
#finally:
#end_progress()
p = gr.Progress()
p(0, desc="Compiling video")
print('-'*50)
print('FFMPEG')
if haslyrics:
ffmpeg_cmd = [
"ffmpeg",
'-framerate', str(fps),
'-i', path + f'out/{name}/%d.png', # Input PNG images
'-i', file, # Input MP3 audio
'-i', path + 'out.srt', # Input SRT subtitles
'-c:v', 'libx264',
'-r', str(fps),
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-c:s', 'mov_text', # Use mov_text codec for subtitles
'-y',
path + f'{name}.mp4' # Output MP4 filename
]
else:
ffmpeg_cmd = [
"ffmpeg",
'-framerate', str(fps),
'-i', path + f'out/{name}/%d.png', # Input PNG images
'-i', f'{file}', # Input MP3 audio
'-c:v', 'libx264',
'-r', str(fps),
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-y',
path + f'{name}.mp4' # Output MP4 filename
]
process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
# Regular expression to match frame information
frame_re = re.compile(r"frame=\s*(\d+)")
p = gr.Progress()
#start_progress('Compliling video')
while True:
output = process.stderr.readline()
if output == '' and process.poll() is not None:
break
if output:
# Check if the output line contains frame information
match = frame_re.search(output)
if match:
frame = match.group(1)
p((int(frame), num_frames), desc="Compiling Video")
#progress(int(frame)/num_frames)
# Wait for the process to complete
process.wait()
#end_progress()
print('-'*50)
return f"{name}.mp4", haslyrics
def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, img=None, tit=None, ast=None):
if audio_file is None:
raise gr.Error("Please Provide an Audio File")
if not str(audio_file).endswith(".mp3"):
raise gr.Error("Only MP3 Files are supported at this time")
resolution = f"{vidwidth}x{vidheight}"
res = tuple(map(int, resolution.split('x')))
video_file, haslyrics = main(audio_file, output_name, fps=fps,
res=res, oscres=oscres, lyrics=lyrics, img=img, tit=tit, ast=ast)
# Clean up the temporary file
shutil.rmtree("out")
srt_output = "out.srt" if haslyrics else None
return video_file, srt_output, haslyrics
def update_srt_output_visibility(haslyrics):
return gr.update(visible=haslyrics)
with gr.Blocks() as demo:
gr.Markdown(
'Upload an MP3 file and configure parameters to create a visualization video.')
gr.Markdown(
'Optionally upload a word or line synced lyric file in the advanced section.')
with gr.Row():
# Inputs on the left
with gr.Column():
with gr.Accordion(label="Audio Settings", open=True):
gr.Markdown('## Load your mp3 file here')
audio_file = gr.File(
label="Upload your MP3 file", file_count='single', file_types=['mp3'])
with gr.Accordion(label="Mp3 Metadata", open=False):
gr.Markdown(
'## Add Metadata here if your mp3 does not have one')
cover_img = gr.Image(label='Cover Art', type="filepath")
title_input = gr.Textbox(label='Title')
artist_input = gr.Textbox(label='Artists')
with gr.Accordion(label="Video Output Settings", open=False):
gr.Markdown('## Configure Video Output Here')
output_name = gr.Textbox(
label="Output Video Name", value='Output')
fps_slider = gr.Slider(
label="Frames per Second", minimum=20, maximum=60, step=1, value=30)
vidwidth_slider = gr.Slider(
label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2)
vidheight_slider = gr.Slider(
label="Output Video Height", minimum=100, maximum=2000, value=720, step=2)
with gr.Accordion(label="Advanced Options", open=False):
oscres_slider = gr.Slider(
label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=1024)
gr.Markdown(
'If uploading LRC, ensure a blank timed line at the end to avoid conversion errors')
lyrics_file = gr.File(label="(Optional) Upload Lyrics as LRC or SRT",
file_count='single', file_types=['lrc', 'srt'])
# Add a submit button
submit_btn = gr.Button("Generate Video")
# Outputs on the right
with gr.Column():
output_video = gr.Video(label="Output")
with gr.Accordion(label="SRT File (Only used if lyrics is provided)", open=False):
srt_output = gr.File(label="SRT Output")
# Bind the button to the function
submit_btn.click(
fn=gradio_interface,
inputs=[audio_file, lyrics_file, output_name, fps_slider, vidwidth_slider,
vidheight_slider, oscres_slider, cover_img, title_input, artist_input],
outputs=[output_video, srt_output]
)
# Launch Gradio interface
if __name__ == '__main__':
demo.launch()
|