UnlimitedMusicGen

Sleeping

App Files Files Community

UnlimitedMusicGen / audiocraft /utils /extend.py

Surn

Add Text and description to video

5d66b58 over 1 year ago

raw

history blame

7.76 kB

	import torch
	import math
	from audiocraft.models import MusicGen
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont, ImageColor
	import string
	import tempfile
	import os
	import textwrap

	def separate_audio_segments(audio, segment_duration=30):
	sr, audio_data = audio[0], audio[1]

	total_samples = len(audio_data)
	segment_samples = sr * segment_duration

	total_segments = math.ceil(total_samples / segment_samples)

	segments = []

	for segment_idx in range(total_segments):
	print(f"Audio Input segment {segment_idx + 1} / {total_segments + 1} \r")
	start_sample = segment_idx * segment_samples
	end_sample = (segment_idx + 1) * segment_samples

	segment = audio_data[start_sample:end_sample]
	segments.append((sr, segment))

	return segments

	def generate_music_segments(text, melody, MODEL, duration:int=10, segment_duration:int=30):
	# generate audio segments
	melody_segments = separate_audio_segments(melody, segment_duration)

	# Create a list to store the melody tensors for each segment
	melodys = []
	output_segments = []

	# Calculate the total number of segments
	total_segments = max(math.ceil(duration / segment_duration),1)
	print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds")

	# If melody_segments is shorter than total_segments, repeat the segments until the total number of segments is reached
	if len(melody_segments) < total_segments:
	for i in range(total_segments - len(melody_segments)):
	segment = melody_segments[i]
	melody_segments.append(segment)
	print(f"melody_segments: {len(melody_segments)} fixed")

	# Iterate over the segments to create list of Meldoy tensors
	for segment_idx in range(total_segments):
	print(f"segment {segment_idx + 1} of {total_segments} \r")
	sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)

	print(f"shape:{verse.shape} dim:{verse.dim()}")
	if verse.dim() == 2:
	verse = verse[None]
	verse = verse[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
	# Append the segment to the melodys list
	melodys.append(verse)

	for idx, verse in enumerate(melodys):
	print(f"Generating New Melody Segment {idx + 1}: {text}\r")
	output = MODEL.generate_with_chroma(
	descriptions=[text],
	melody_wavs=verse,
	melody_sample_rate=sr,
	progress=True
	)

	# Append the generated output to the list of segments
	#output_segments.append(output[:, :segment_duration])
	output_segments.append(output)
	print(f"output_segments: {len(output_segments)}: shape: {output.shape} dim {output.dim()}")
	return output_segments

	#def generate_music_segments(text, melody, duration, MODEL, segment_duration=30):
	# sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)

	# # Create a list to store the melody tensors for each segment
	# melodys = []

	# # Calculate the total number of segments
	# total_segments = math.ceil(melody.shape[1] / (sr * segment_duration))

	# # Iterate over the segments
	# for segment_idx in range(total_segments):
	# print(f"segment {segment_idx + 1} / {total_segments + 1} \r")
	# start_frame = segment_idx * sr * segment_duration
	# end_frame = (segment_idx + 1) * sr * segment_duration

	# # Extract the segment from the melody tensor
	# segment = melody[:, start_frame:end_frame]

	# # Append the segment to the melodys list
	# melodys.append(segment)

	# output_segments = []

	# for segment in melodys:
	# output = MODEL.generate_with_chroma(
	# descriptions=[text],
	# melody_wavs=segment,
	# melody_sample_rate=sr,
	# progress=False
	# )

	# # Append the generated output to the list of segments
	# output_segments.append(output[:, :segment_duration])

	# return output_segments

	def save_image(image):
	"""
	Saves a PIL image to a temporary file and returns the file path.

	Parameters:
	- image: PIL.Image
	The PIL image object to be saved.

	Returns:
	- str or None: The file path where the image was saved,
	or None if there was an error saving the image.

	"""
	temp_dir = tempfile.gettempdir()
	temp_file = tempfile.NamedTemporaryFile(suffix=".png", dir=temp_dir, delete=False)
	temp_file.close()
	file_path = temp_file.name

	try:
	image.save(file_path)

	except Exception as e:
	print("Unable to save image:", str(e))
	return None
	finally:
	return file_path

	def hex_to_rgba(hex_color):
	try:
	# Convert hex color to RGBA tuple
	rgba = ImageColor.getcolor(hex_color, "RGBA")
	except ValueError:
	# If the hex color is invalid, default to yellow
	rgba = (255,255,0,255)
	return rgba

	def add_settings_to_image(title: str = "title", description: str = "", width: int = 768, height: int = 512, background_path: str = "", font: str = "arial.ttf", font_color: str = "#ffffff"):
	# Create a new RGBA image with the specified dimensions
	image = Image.new("RGBA", (width, height), (255, 255, 255, 0))
	# If a background image is specified, open it and paste it onto the image
	if background_path == "":
	background = Image.new("RGBA", (width, height), (255, 255, 255, 255))
	else:
	background = Image.open(background_path).convert("RGBA")

	#Convert font color to RGBA tuple
	font_color = hex_to_rgba(font_color)

	# Calculate the center coordinates for placing the text
	text_x = width // 2
	text_y = height // 2
	# Draw the title text at the center top
	title_font = ImageFont.truetype(font, 26) # Replace with your desired font and size
	title_text = '\n'.join(textwrap.wrap(title, width // 12))
	title_x, title_y, title_text_width, title_text_height = title_font.getbbox(title_text)
	title_x = max(text_x - (title_text_width // 2), title_x, 0)
	title_y = text_y - (height // 2) + 10 # 10 pixels padding from the top
	title_draw = ImageDraw.Draw(image)
	title_draw.multiline_text((title_x, title_y), title, fill=font_color, font=title_font, align="center")
	# Draw the description text two lines below the title
	description_font = ImageFont.truetype(font, 16) # Replace with your desired font and size
	description_text = '\n'.join(textwrap.wrap(description, width // 12))
	description_x, description_y, description_text_width, description_text_height = description_font.getbbox(description_text)
	description_x = max(text_x - (description_text_width // 2), description_x, 0)
	description_y = title_y + title_text_height + 20 # 20 pixels spacing between title and description
	description_draw = ImageDraw.Draw(image)
	description_draw.multiline_text((description_x, description_y), description_text, fill=font_color, font=description_font, align="center")
	# Calculate the offset to center the image on the background
	bg_w, bg_h = background.size
	offset = ((bg_w - width) // 2, (bg_h - height) // 2)
	# Paste the image onto the background
	background.paste(image, offset, mask=image)

	# Save the image and return the file path
	return save_image(background)


	def sanitize_file_name(filename):
	valid_chars = "-_.() " + string.ascii_letters + string.digits
	sanitized_filename = ''.join(c for c in filename if c in valid_chars)
	return sanitized_filename