Spaces:
Running
Running
import gradio as gr | |
import librosa | |
from PIL import Image, ImageDraw, ImageFont | |
from mutagen.mp3 import MP3 | |
from mutagen.id3 import ID3, APIC, TIT2, TPE1 | |
import io | |
from colorthief import ColorThief | |
import colorsys | |
import math | |
import os | |
from multiprocessing import Pool, cpu_count | |
import tempfile | |
import ffmpeg | |
import subprocess | |
import traceback | |
import shutil | |
import LRC2SRT | |
import sys | |
import re | |
flag = 1 | |
path = "" # Update with your path | |
def safe_read(i: int, a: list): | |
if i >= len(a): | |
return 128 | |
else: | |
return a[i] | |
def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list: | |
i = idx - res // 2 | |
x, y = size[0] * .9 / -2, (safe_read(i, ta) - 128) * \ | |
(size[1] / 2000) + (size[1] * .7 / -2) | |
c = [] | |
while i < idx + (res // 2): | |
c.append((x, y)) | |
i += 1 | |
y = (safe_read(i, ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2) | |
x += (size[0] * .9) / res | |
return c | |
def center_to_top_left(coords, width=1280, height=720): | |
new_coords = [] | |
for x, y in coords: | |
new_coords.append(totopleft((x, y), width=width, height=height)) | |
return new_coords | |
def totopleft(coord, width=1280, height=720): | |
return coord[0] + width / 2, height / 2 - coord[1] | |
def getTrigger(ad: int, a: list, max: int = 1024) -> int: | |
i = ad | |
while not (safe_read(i, a) < 126 and safe_read(i+10, a) < 130 or i - ad > max): | |
i += 1 | |
return i | |
def extract_cover_image(mp3_file): | |
audio = MP3(mp3_file, ID3=ID3) | |
if audio.tags == None: | |
return -1 | |
for tag in audio.tags.values(): | |
if isinstance(tag, APIC): | |
image_data = tag.data | |
cover_image = Image.open(io.BytesIO(image_data)) | |
return cover_image | |
print("No cover image found in the MP3 file.") | |
return None | |
def getTitleAndArtist(mp3_file): | |
audio = MP3(mp3_file, ID3=ID3) | |
title = audio.get('TIT2', TIT2(encoding=3, text='')).text[0] | |
artist = audio.get('TPE1', TPE1(encoding=3, text='')).text[0] | |
return title, artist | |
def getColour(img): | |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: | |
img.save(tmpfile.name, format="PNG") | |
color_thief = ColorThief(tmpfile.name) | |
dominant_color = color_thief.get_color(quality=1) | |
os.remove(tmpfile.name) | |
return dominant_color | |
def clamp(number): | |
return max(0, min(number, 1)) | |
def normalizeColour(C): | |
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255) | |
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8) | |
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255) | |
def normalizeColourBar(C): | |
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255) | |
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6) | |
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255) | |
def stamp_text(draw, text, font, position, align='left'): | |
text_bbox = draw.textbbox((0, 0), text, font=font) | |
text_width = text_bbox[2] - text_bbox[0] | |
text_height = text_bbox[3] - text_bbox[1] | |
x, y = position | |
y -= text_height // 2 | |
if align == 'center': | |
x -= text_width // 2 | |
elif align == 'right': | |
x -= text_width | |
draw.text((x, y), text, font=font, fill="#fff") | |
def linear_interpolate(start, stop, progress): | |
return start + progress * (stop - start) | |
def filecount(p): | |
files = os.listdir() | |
file_count = len(files) | |
return file_count | |
def render_frame(params): | |
n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params | |
num_frames = len(samples_array) // (sr // fps) | |
img = Image.new('RGB', (width, height), normalizeColour(dominant_color)) | |
d = ImageDraw.Draw(img) | |
s = math.floor((sr / fps) * n) | |
e = center_to_top_left(getRenderCords(samples_array, getTrigger( | |
s, samples_array, max=oscres), res=oscres, size=(width, height)), width=width, height=height) | |
d.line(e, fill='#fff', width=round(min(2*height/720, 2*width/1280))) | |
cs = math.floor(min(width, height) / 2) | |
cov = cover_img.resize((cs, cs)) | |
img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1))) | |
fontT = ImageFont.truetype( | |
path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1) | |
fontA = ImageFont.truetype( | |
path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1) | |
fontD = ImageFont.truetype( | |
path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1) | |
stamp_text(d, title, fontT, totopleft( | |
(0, min(width, height) * .3 // -2), width=width, height=height), 'center') | |
stamp_text(d, artist, fontA, totopleft( | |
(0, min(width, height) * .44 // -2), width=width, height=height), 'center') | |
d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height), | |
fill=normalizeColourBar(dominant_color), width=15 * height // 360) | |
d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2), | |
(linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)), | |
height * .95 // -2)], width=width, height=height), fill='#fff', width=10 * height // 360) | |
img.save(path+f'out/{name}/{str(n)}.png', 'PNG') | |
return 1 # Indicate one frame processed | |
def RenderVid(af, n, fps=30): | |
(ffmpeg | |
.input(path+f'out/{n}/%d.png', framerate=fps) | |
.input(af) | |
.output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None) | |
.run() | |
) | |
gr.Interface.download(f"{n}.mp4") | |
invisible_chars = ["\u200B", "\uFEFF"] | |
def remove_bom(data: str) -> str: | |
BOM = '\ufeff' | |
return data.lstrip(BOM) | |
def stripinvisibles(s): | |
e = remove_bom(s) | |
for i in invisible_chars: | |
e.replace(i, "") | |
return e | |
def start_progress(title): | |
global progress_x | |
sys.stdout.write(f"{title}:\n") | |
sys.stdout.write(f"[{'-'*60}] 0%\r") | |
sys.stdout.flush() | |
progress_x = 0 | |
def progress(x): | |
global progress_x | |
sys.stdout.write(f"[{'#'*round(60*x)}{'-'*(60-round(60*x))}] {x:.2%}\r") | |
sys.stdout.flush() | |
progress_x = x | |
def end_progress(): | |
sys.stdout.write(f"[{'#'*60}] 100.00%\r\n") | |
sys.stdout.flush() | |
haslyrics = False | |
def main(file, name, fps=30, res: tuple = (1280, 720), oscres=512, sr=11025, lyrics=None, img=None, tit=None, ast=None): | |
global flag | |
p = gr.Progress() | |
LRC2SRT.clear() | |
if os.path.exists("out.srt"): | |
os.remove("out.srt") | |
global haslyrics | |
haslyrics = False | |
if lyrics: | |
p(0.5, "parsing lyrics") | |
try: | |
outf = open("out.srt", mode="x", encoding="UTF8") | |
sf = stripinvisibles(open(lyrics, encoding="UTF8").read()) | |
if sf[0] == '[': | |
gr.Info("Lyrics of LRC type was detected, converting to SRT") | |
LRC2SRT.convert_to_srt(sf) | |
outf.write('\n'.join(LRC2SRT.SRT)) | |
haslyrics = True | |
elif sf[0].isdigit(): | |
outf.write(sf) | |
gr.Info("Lyrics of SRT type was detected") | |
haslyrics = True | |
else: | |
gr.Warning("Lyrics file is invalid, skipping") | |
except Exception as e: | |
print(traceback.format_exc()) | |
gr.Warning( | |
"Failed to parse lyrics, ensure there are no blank lines in between, you may use Lyrics Editor to ensure compatability") | |
os.makedirs(path + f'out/{name}/', exist_ok=True) | |
global iii | |
iii = 0 | |
# Load the audio file | |
if flag: | |
gr.Info("This is the first file since startup, this may take some time") | |
flag = 0 | |
p(0.25, "loading file") | |
audio_path = file | |
y, sr = librosa.load(audio_path, sr=sr) # Resample to 11025 Hz | |
y_u8 = (y * 128 + 128).astype('uint8') | |
samples_array = y_u8.tolist() | |
p(0.5, "extracting metadata") | |
# Extract cover image, title, and artist | |
cover_file = None | |
if img: | |
cover_file = Image.open(img) | |
cover_img = extract_cover_image(audio_path) | |
if img: | |
cover_img = cover_file | |
if cover_img is None: | |
raise gr.Error( | |
"Mp3 must have a cover image, upload the image under the 'Metadata' section", duration=None) | |
elif cover_img == -1 and not (tit or ast or img): | |
raise gr.Error( | |
"Mp3 is missing tags, add the info under the 'Metadata' section", duration=None) | |
title, artist = getTitleAndArtist(audio_path) | |
if tit and ast: | |
title, artist = tit, ast | |
if title == '' or artist == '': | |
gr.Warning('Missing Title or Artist') | |
if img: | |
color_thief = ColorThief(img) | |
dominant_color = color_thief.get_color(quality=1) | |
cover_img = cover_file | |
else: | |
dominant_color = getColour(cover_img) | |
# Frame rendering parameters | |
width, height, fps = res[0], res[1], fps | |
num_frames = len(samples_array) // (sr // fps) | |
# Prepare parameters for each frame | |
params = [(n, samples_array, cover_img, title, artist, dominant_color, | |
width, height, fps, name, oscres, sr) for n in range(num_frames)] | |
print('-'*50) | |
print('Info:' + "External" if img else "ID3") | |
print("Title: " + title) | |
print("Artist: " + artist) | |
print(f'Resolution: {str(width)}x{str(height)}') | |
print("Background Colour: " + str(dominant_color)) | |
print('Framerate: ' + str(fps)) | |
print('Frame Count: ' + str(num_frames)) | |
print('Segments per frame: ' + str(oscres)) | |
print('-'*50) | |
#start_progress("Rendering:") | |
try: | |
with Pool(cpu_count()-1) as pool: | |
num_frames = len(samples_array) // (sr // fps) | |
# Use imap to get progress updates | |
for _ in pool.imap_unordered(render_frame, params): | |
iii += 1 # Increment frame count for progress | |
p((iii, num_frames), desc="Rendering Frames") | |
#progress(iii/num_frames) | |
except Exception as e: | |
raise gr.Error("Something went wrong whilst rendering") | |
#finally: | |
#end_progress() | |
p = gr.Progress() | |
p(0, desc="Compiling video") | |
print('-'*50) | |
print('FFMPEG') | |
if haslyrics: | |
ffmpeg_cmd = [ | |
"ffmpeg", | |
'-framerate', str(fps), | |
'-i', path + f'out/{name}/%d.png', # Input PNG images | |
'-i', file, # Input MP3 audio | |
'-i', path + 'out.srt', # Input SRT subtitles | |
'-c:v', 'libx264', | |
'-r', str(fps), | |
'-pix_fmt', 'yuv420p', | |
'-c:a', 'aac', | |
'-c:s', 'mov_text', # Use mov_text codec for subtitles | |
'-y', | |
path + f'{name}.mp4' # Output MP4 filename | |
] | |
else: | |
ffmpeg_cmd = [ | |
"ffmpeg", | |
'-framerate', str(fps), | |
'-i', path + f'out/{name}/%d.png', # Input PNG images | |
'-i', f'{file}', # Input MP3 audio | |
'-c:v', 'libx264', | |
'-r', str(fps), | |
'-pix_fmt', 'yuv420p', | |
'-c:a', 'aac', | |
'-y', | |
path + f'{name}.mp4' # Output MP4 filename | |
] | |
process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) | |
# Regular expression to match frame information | |
frame_re = re.compile(r"frame=\s*(\d+)") | |
p = gr.Progress() | |
#start_progress('Compliling video') | |
while True: | |
output = process.stderr.readline() | |
if output == '' and process.poll() is not None: | |
break | |
if output: | |
# Check if the output line contains frame information | |
match = frame_re.search(output) | |
if match: | |
frame = match.group(1) | |
p((int(frame), num_frames), desc="Compiling Video") | |
#progress(int(frame)/num_frames) | |
# Wait for the process to complete | |
process.wait() | |
#end_progress() | |
print('-'*50) | |
return f"{name}.mp4", haslyrics | |
def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, img=None, tit=None, ast=None): | |
if audio_file is None: | |
raise gr.Error("Please Provide an Audio File") | |
if not str(audio_file).endswith(".mp3"): | |
raise gr.Error("Only MP3 Files are supported at this time") | |
resolution = f"{vidwidth}x{vidheight}" | |
res = tuple(map(int, resolution.split('x'))) | |
video_file, haslyrics = main(audio_file, output_name, fps=fps, | |
res=res, oscres=oscres, lyrics=lyrics, img=img, tit=tit, ast=ast) | |
# Clean up the temporary file | |
shutil.rmtree("out") | |
srt_output = "out.srt" if haslyrics else None | |
return video_file, srt_output, haslyrics | |
def update_srt_output_visibility(haslyrics): | |
return gr.update(visible=haslyrics) | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
'Upload an MP3 file and configure parameters to create a visualization video.') | |
gr.Markdown( | |
'Optionally upload a word or line synced lyric file in the advanced section.') | |
with gr.Row(): | |
# Inputs on the left | |
with gr.Column(): | |
with gr.Accordion(label="Audio Settings", open=True): | |
gr.Markdown('## Load your mp3 file here') | |
audio_file = gr.File( | |
label="Upload your MP3 file", file_count='single', file_types=['mp3']) | |
with gr.Accordion(label="Mp3 Metadata", open=False): | |
gr.Markdown( | |
'## Add Metadata here if your mp3 does not have one') | |
cover_img = gr.Image(label='Cover Art', type="filepath") | |
title_input = gr.Textbox(label='Title') | |
artist_input = gr.Textbox(label='Artists') | |
with gr.Accordion(label="Video Output Settings", open=False): | |
gr.Markdown('## Configure Video Output Here') | |
output_name = gr.Textbox( | |
label="Output Video Name", value='Output') | |
fps_slider = gr.Slider( | |
label="Frames per Second", minimum=20, maximum=60, step=1, value=30) | |
vidwidth_slider = gr.Slider( | |
label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2) | |
vidheight_slider = gr.Slider( | |
label="Output Video Height", minimum=100, maximum=2000, value=720, step=2) | |
with gr.Accordion(label="Advanced Options", open=False): | |
oscres_slider = gr.Slider( | |
label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=1024) | |
gr.Markdown( | |
'If uploading LRC, ensure a blank timed line at the end to avoid conversion errors') | |
lyrics_file = gr.File(label="(Optional) Upload Lyrics as LRC or SRT", | |
file_count='single', file_types=['lrc', 'srt']) | |
# Add a submit button | |
submit_btn = gr.Button("Generate Video") | |
# Outputs on the right | |
with gr.Column(): | |
output_video = gr.Video(label="Output") | |
with gr.Accordion(label="SRT File (Only used if lyrics is provided)", open=False): | |
srt_output = gr.File(label="SRT Output") | |
# Bind the button to the function | |
submit_btn.click( | |
fn=gradio_interface, | |
inputs=[audio_file, lyrics_file, output_name, fps_slider, vidwidth_slider, | |
vidheight_slider, oscres_slider, cover_img, title_input, artist_input], | |
outputs=[output_video, srt_output] | |
) | |
# Launch Gradio interface | |
if __name__ == '__main__': | |
demo.launch() | |