Spaces:

ziqiangao
/

musicscopegen

Running

App Files Files Community

musicscopegen / app.py

ziqiangao

Update app.py

ef39bd6 verified 4 months ago

raw

history blame contribute delete

16 kB

	import gradio as gr
	import librosa
	from PIL import Image, ImageDraw, ImageFont
	from mutagen.mp3 import MP3
	from mutagen.id3 import ID3, APIC, TIT2, TPE1
	import io
	from colorthief import ColorThief
	import colorsys
	import math
	import os
	from multiprocessing import Pool, cpu_count
	import tempfile
	import ffmpeg
	import subprocess
	import traceback
	import shutil
	import LRC2SRT
	import sys
	import re

	flag = 1

	path = "" # Update with your path


	def safe_read(i: int, a: list):
	if i >= len(a):
	return 128
	else:
	return a[i]


	def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
	i = idx - res // 2
	x, y = size[0] * .9 / -2, (safe_read(i, ta) - 128) * \
	(size[1] / 2000) + (size[1] * .7 / -2)
	c = []
	while i < idx + (res // 2):
	c.append((x, y))
	i += 1
	y = (safe_read(i, ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
	x += (size[0] * .9) / res
	return c


	def center_to_top_left(coords, width=1280, height=720):
	new_coords = []
	for x, y in coords:
	new_coords.append(totopleft((x, y), width=width, height=height))
	return new_coords


	def totopleft(coord, width=1280, height=720):
	return coord[0] + width / 2, height / 2 - coord[1]


	def getTrigger(ad: int, a: list, max: int = 1024) -> int:
	i = ad
	while not (safe_read(i, a) < 126 and safe_read(i+10, a) < 130 or i - ad > max):
	i += 1
	return i


	def extract_cover_image(mp3_file):
	audio = MP3(mp3_file, ID3=ID3)
	if audio.tags == None:
	return -1
	for tag in audio.tags.values():
	if isinstance(tag, APIC):
	image_data = tag.data
	cover_image = Image.open(io.BytesIO(image_data))
	return cover_image
	print("No cover image found in the MP3 file.")
	return None


	def getTitleAndArtist(mp3_file):
	audio = MP3(mp3_file, ID3=ID3)
	title = audio.get('TIT2', TIT2(encoding=3, text='')).text[0]
	artist = audio.get('TPE1', TPE1(encoding=3, text='')).text[0]
	return title, artist


	def getColour(img):
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
	img.save(tmpfile.name, format="PNG")
	color_thief = ColorThief(tmpfile.name)
	dominant_color = color_thief.get_color(quality=1)
	os.remove(tmpfile.name)
	return dominant_color


	def clamp(number):
	return max(0, min(number, 1))


	def normalizeColour(C):
	cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
	ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
	return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)


	def normalizeColourBar(C):
	cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
	ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
	return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)


	def stamp_text(draw, text, font, position, align='left'):
	text_bbox = draw.textbbox((0, 0), text, font=font)
	text_width = text_bbox[2] - text_bbox[0]
	text_height = text_bbox[3] - text_bbox[1]
	x, y = position
	y -= text_height // 2
	if align == 'center':
	x -= text_width // 2
	elif align == 'right':
	x -= text_width

	draw.text((x, y), text, font=font, fill="#fff")


	def linear_interpolate(start, stop, progress):
	return start + progress * (stop - start)


	def filecount(p):
	files = os.listdir()
	file_count = len(files)
	return file_count


	def render_frame(params):
	n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
	num_frames = len(samples_array) // (sr // fps)
	img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
	d = ImageDraw.Draw(img)

	s = math.floor((sr / fps) * n)
	e = center_to_top_left(getRenderCords(samples_array, getTrigger(
	s, samples_array, max=oscres), res=oscres, size=(width, height)), width=width, height=height)
	d.line(e, fill='#fff', width=round(min(2height/720, 2width/1280)))

	cs = math.floor(min(width, height) / 2)
	cov = cover_img.resize((cs, cs))
	img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))

	fontT = ImageFont.truetype(
	path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1)
	fontA = ImageFont.truetype(
	path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1)
	fontD = ImageFont.truetype(
	path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1)

	stamp_text(d, title, fontT, totopleft(
	(0, min(width, height) * .3 // -2), width=width, height=height), 'center')
	stamp_text(d, artist, fontA, totopleft(
	(0, min(width, height) * .44 // -2), width=width, height=height), 'center')

	d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
	fill=normalizeColourBar(dominant_color), width=15 * height // 360)
	d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
	(linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
	height * .95 // -2)], width=width, height=height), fill='#fff', width=10 * height // 360)

	img.save(path+f'out/{name}/{str(n)}.png', 'PNG')

	return 1 # Indicate one frame processed


	def RenderVid(af, n, fps=30):
	(ffmpeg
	.input(path+f'out/{n}/%d.png', framerate=fps)
	.input(af)
	.output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
	.run()
	)
	gr.Interface.download(f"{n}.mp4")


	invisible_chars = ["\u200B", "\uFEFF"]


	def remove_bom(data: str) -> str:
	BOM = '\ufeff'
	return data.lstrip(BOM)


	def stripinvisibles(s):
	e = remove_bom(s)
	for i in invisible_chars:
	e.replace(i, "")
	return e

	def start_progress(title):
	global progress_x
	sys.stdout.write(f"{title}:\n")
	sys.stdout.write(f"[{'-'*60}] 0%\r")
	sys.stdout.flush()
	progress_x = 0

	def progress(x):
	global progress_x
	sys.stdout.write(f"[{'#'round(60x)}{'-'(60-round(60x))}] {x:.2%}\r")
	sys.stdout.flush()
	progress_x = x

	def end_progress():
	sys.stdout.write(f"[{'#'*60}] 100.00%\r\n")
	sys.stdout.flush()


	haslyrics = False


	def main(file, name, fps=30, res: tuple = (1280, 720), oscres=512, sr=11025, lyrics=None, img=None, tit=None, ast=None):
	global flag
	p = gr.Progress()
	LRC2SRT.clear()
	if os.path.exists("out.srt"):
	os.remove("out.srt")
	global haslyrics
	haslyrics = False
	if lyrics:
	p(0.5, "parsing lyrics")
	try:
	outf = open("out.srt", mode="x", encoding="UTF8")
	sf = stripinvisibles(open(lyrics, encoding="UTF8").read())
	if sf[0] == '[':
	gr.Info("Lyrics of LRC type was detected, converting to SRT")
	LRC2SRT.convert_to_srt(sf)
	outf.write('\n'.join(LRC2SRT.SRT))
	haslyrics = True
	elif sf[0].isdigit():
	outf.write(sf)
	gr.Info("Lyrics of SRT type was detected")
	haslyrics = True
	else:
	gr.Warning("Lyrics file is invalid, skipping")
	except Exception as e:
	print(traceback.format_exc())
	gr.Warning(
	"Failed to parse lyrics, ensure there are no blank lines in between, you may use Lyrics Editor to ensure compatability")

	os.makedirs(path + f'out/{name}/', exist_ok=True)
	global iii
	iii = 0
	# Load the audio file
	if flag:
	gr.Info("This is the first file since startup, this may take some time")
	flag = 0
	p(0.25, "loading file")
	audio_path = file
	y, sr = librosa.load(audio_path, sr=sr) # Resample to 11025 Hz
	y_u8 = (y * 128 + 128).astype('uint8')
	samples_array = y_u8.tolist()
	p(0.5, "extracting metadata")
	# Extract cover image, title, and artist
	cover_file = None
	if img:
	cover_file = Image.open(img)
	cover_img = extract_cover_image(audio_path)
	if img:
	cover_img = cover_file
	if cover_img is None:
	raise gr.Error(
	"Mp3 must have a cover image, upload the image under the 'Metadata' section", duration=None)
	elif cover_img == -1 and not (tit or ast or img):
	raise gr.Error(
	"Mp3 is missing tags, add the info under the 'Metadata' section", duration=None)

	title, artist = getTitleAndArtist(audio_path)
	if tit and ast:
	title, artist = tit, ast
	if title == '' or artist == '':
	gr.Warning('Missing Title or Artist')
	if img:
	color_thief = ColorThief(img)
	dominant_color = color_thief.get_color(quality=1)
	cover_img = cover_file
	else:
	dominant_color = getColour(cover_img)

	# Frame rendering parameters
	width, height, fps = res[0], res[1], fps
	num_frames = len(samples_array) // (sr // fps)

	# Prepare parameters for each frame
	params = [(n, samples_array, cover_img, title, artist, dominant_color,
	width, height, fps, name, oscres, sr) for n in range(num_frames)]
	print('-'*50)
	print('Info:' + "External" if img else "ID3")
	print("Title: " + title)
	print("Artist: " + artist)
	print(f'Resolution: {str(width)}x{str(height)}')
	print("Background Colour: " + str(dominant_color))
	print('Framerate: ' + str(fps))
	print('Frame Count: ' + str(num_frames))
	print('Segments per frame: ' + str(oscres))
	print('-'*50)
	#start_progress("Rendering:")
	try:
	with Pool(cpu_count()-1) as pool:
	num_frames = len(samples_array) // (sr // fps)
	# Use imap to get progress updates
	for _ in pool.imap_unordered(render_frame, params):
	iii += 1 # Increment frame count for progress
	p((iii, num_frames), desc="Rendering Frames")
	#progress(iii/num_frames)

	except Exception as e:
	raise gr.Error("Something went wrong whilst rendering")
	#finally:
	#end_progress()

	p = gr.Progress()
	p(0, desc="Compiling video")
	print('-'*50)
	print('FFMPEG')
	if haslyrics:
	ffmpeg_cmd = [
	"ffmpeg",
	'-framerate', str(fps),
	'-i', path + f'out/{name}/%d.png', # Input PNG images
	'-i', file, # Input MP3 audio
	'-i', path + 'out.srt', # Input SRT subtitles
	'-c:v', 'libx264',
	'-r', str(fps),
	'-pix_fmt', 'yuv420p',
	'-c:a', 'aac',
	'-c:s', 'mov_text', # Use mov_text codec for subtitles
	'-y',
	path + f'{name}.mp4' # Output MP4 filename
	]
	else:
	ffmpeg_cmd = [
	"ffmpeg",
	'-framerate', str(fps),
	'-i', path + f'out/{name}/%d.png', # Input PNG images
	'-i', f'{file}', # Input MP3 audio
	'-c:v', 'libx264',
	'-r', str(fps),
	'-pix_fmt', 'yuv420p',
	'-c:a', 'aac',
	'-y',
	path + f'{name}.mp4' # Output MP4 filename
	]
	process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
	# Regular expression to match frame information
	frame_re = re.compile(r"frame=\s*(\d+)")
	p = gr.Progress()
	#start_progress('Compliling video')
	while True:
	output = process.stderr.readline()
	if output == '' and process.poll() is not None:
	break
	if output:
	# Check if the output line contains frame information
	match = frame_re.search(output)
	if match:
	frame = match.group(1)
	p((int(frame), num_frames), desc="Compiling Video")
	#progress(int(frame)/num_frames)

	# Wait for the process to complete
	process.wait()
	#end_progress()
	print('-'*50)
	return f"{name}.mp4", haslyrics


	def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, img=None, tit=None, ast=None):
	if audio_file is None:
	raise gr.Error("Please Provide an Audio File")
	if not str(audio_file).endswith(".mp3"):
	raise gr.Error("Only MP3 Files are supported at this time")

	# Check MP3 duration limit (15 min = 900 sec)
	audio = MP3(audio_file.name)
	if audio.info.length > 900:
	raise gr.Error("The uploaded MP3 file exceeds the 15-minute limit. Please upload a shorter file.")

	resolution = f"{vidwidth}x{vidheight}"
	res = tuple(map(int, resolution.split('x')))
	video_file, haslyrics = main(audio_file, output_name, fps=fps,
	res=res, oscres=oscres, lyrics=lyrics, img=img, tit=tit, ast=ast)

	# Clean up the temporary file
	shutil.rmtree("out")

	srt_output = "out.srt" if haslyrics else None
	return video_file, srt_output, haslyrics


	def update_srt_output_visibility(haslyrics):
	return gr.update(visible=haslyrics)


	with gr.Blocks() as demo:
	gr.Markdown(
	'Upload an MP3 file and configure parameters to create a visualization video.')
	gr.Markdown(
	'Optionally upload a word or line synced lyric file in the advanced section.')

	with gr.Row():
	# Inputs on the left
	with gr.Column():
	with gr.Accordion(label="Audio Settings", open=True):
	gr.Markdown('## Load your mp3 file here')
	audio_file = gr.File(
	label="Upload your MP3 file", file_count='single', file_types=['.mp3'])

	with gr.Accordion(label="Mp3 Metadata", open=False):
	gr.Markdown(
	'## Add Metadata here if your mp3 does not have one')
	cover_img = gr.Image(label='Cover Art', type="filepath")
	title_input = gr.Textbox(label='Title')
	artist_input = gr.Textbox(label='Artists')

	with gr.Accordion(label="Video Output Settings", open=False):
	gr.Markdown('## Configure Video Output Here')
	output_name = gr.Textbox(
	label="Output Video Name", value='Output')
	fps_slider = gr.Slider(
	label="Frames per Second", minimum=20, maximum=60, step=1, value=30)
	vidwidth_slider = gr.Slider(
	label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2)
	vidheight_slider = gr.Slider(
	label="Output Video Height", minimum=100, maximum=2000, value=720, step=2)

	with gr.Accordion(label="Advanced Options", open=False):
	oscres_slider = gr.Slider(
	label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=1024)
	gr.Markdown(
	'If uploading LRC, ensure a blank timed line at the end to avoid conversion errors')
	lyrics_file = gr.File(label="(Optional) Upload Lyrics as LRC or SRT",
	file_count='single', file_types=['lrc', 'srt'])

	# Add a submit button
	submit_btn = gr.Button("Generate Video")

	# Outputs on the right
	with gr.Column():
	output_video = gr.Video(label="Output")
	with gr.Accordion(label="SRT File (Only used if lyrics is provided)", open=False):
	srt_output = gr.File(label="SRT Output")

	# Bind the button to the function
	submit_btn.click(
	fn=gradio_interface,
	inputs=[audio_file, lyrics_file, output_name, fps_slider, vidwidth_slider,
	vidheight_slider, oscres_slider, cover_img, title_input, artist_input],
	outputs=[output_video, srt_output]
	)

	# Launch Gradio interface
	if __name__ == '__main__':
	demo.launch()