Spaces:

callum-canavan
/

Multi-View-Illusion-Diffusion

Paused

App Files Files Community

Multi-View-Illusion-Diffusion / visual_anagrams /animate.py

callum-canavan

Remove comment

09b83c7 over 1 year ago

raw

history blame contribute delete

5.71 kB

	import cv2
	from tqdm import tqdm
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont, ImageChops
	import imageio
	from pygifsicle import optimize

	import torchvision.transforms.functional as TF

	from visual_anagrams.views import get_views
	from visual_anagrams.utils import get_courier_font_path


	def draw_text(image, text, fill=(0,0,0), frame_size=384, im_size=256):
	image = image.copy()

	# Font info
	font_path = get_courier_font_path()
	font_size = 16

	# Make PIL objects
	draw = ImageDraw.Draw(image)
	font = ImageFont.truetype(font_path, font_size)

	# Center text horizontally, and vertically between
	# illusion bottom and frame bottom
	text_position = (0, 0)
	bbox = draw.textbbox(text_position, text, font=font, align='center')
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	text_left = (frame_size - text_width) // 2
	text_top = int(3/4 * frame_size + 1/4 * im_size - 1/2 * text_height)
	text_position = (text_left, text_top)

	# Draw text on image
	draw.text(text_position, text, font=font, fill=fill, align='center')
	return image


	def easeInOutQuint(x):
	# From Matthew Tancik:
	# https://github.com/tancik/Illusion-Diffusion/blob/main/IllusionDiffusion.ipynb
	if x < 0.5:
	return 4 * x**3
	else:
	return 1 - (-2 * x + 2)**3 / 2


	def animate_two_view(
	im_path,
	view,
	prompt_1,
	prompt_2,
	save_video_path='tmp.mp4',
	hold_duration=60,
	text_fade_duration=10,
	transition_duration=80,
	im_size=256,
	frame_size=384,
	):
	'''
	TODO: Assuming two views, first one is identity
	'''
	im = Image.open(im_path)

	# Make list of frames
	frames = []

	# Make frames for two views
	frame_1 = view.make_frame(im, 0.0)
	frame_2 = view.make_frame(im, 1.0)

	# Display frame 1 with text
	frame_1_text = draw_text(frame_1,
	prompt_1,
	frame_size=frame_size,
	im_size=im_size)
	frames += [frame_1_text] * (hold_duration // 2)

	# Fade out text 1
	for t in np.linspace(0,1,text_fade_duration):
	c = int(t * 255)
	fill = (c,c,c)
	frame = draw_text(frame_1,
	prompt_1,
	fill=fill,
	frame_size=frame_size,
	im_size=im_size)
	frames.append(frame)

	# Transition view 1 -> view 2
	for t in tqdm(np.linspace(0,1,transition_duration)):
	t_ease = easeInOutQuint(t)
	frames.append(view.make_frame(im, t_ease))

	# Fade in text 2
	for t in np.linspace(1,0,text_fade_duration):
	c = int(t * 255)
	fill = (c,c,c)
	frame = draw_text(frame_2,
	prompt_2,
	fill=fill,
	frame_size=frame_size,
	im_size=im_size)
	frames.append(frame)

	# Display frame 2 with text
	frame_2_text = draw_text(frame_2,
	prompt_2,
	frame_size=frame_size,
	im_size=im_size)
	frames += [frame_2_text] * (hold_duration // 2)

	# "Boomerang" the clip, so we get back to view 1
	frames = frames + frames[::-1]

	# Move last bit of clip to front
	frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
	images = frames


	# Save the frames as a GIF
	image_array = [imageio.core.asarray(frame) for frame in frames]

	# Save as video
	print('Making video...')
	imageio.mimsave(save_video_path, image_array, fps=30)



	if __name__ == '__main__':
	import argparse
	import pickle
	from pathlib import Path

	parser = argparse.ArgumentParser()
	parser.add_argument("--im_path", required=True, type=str, help='Path to the illusion to animate')
	parser.add_argument("--save_video_path", default=None, type=str,
	help='Path to save video to. If None, defaults to `im_path`, with extension `.mp4`')
	parser.add_argument("--metadata_path", default=None, type=str, help='Path to metadata. If specified, overrides `view` and `prompt` args')
	parser.add_argument("--view", default=None, type=str, help='Name of view to use')
	parser.add_argument("--prompt_1", default='', nargs='+', type=str,
	help='Prompt for first view. Passing multiple will join them with newlines.')
	parser.add_argument("--prompt_2", default='', nargs='+', type=str,
	help='Prompt for first view. Passing multiple will join them with newlines.')
	args = parser.parse_args()


	# Load image
	im_path = Path(args.im_path)

	# Get save dir
	if args.save_video_path is None:
	save_video_path = im_path.with_suffix('.mp4')

	if args.metadata_path is None:
	# Join prompts with newlines
	prompt_1 = '\n'.join(args.prompt_1)
	prompt_2 = '\n'.join(args.prompt_2)

	# Get paths and views
	view = get_views([args.view])[0]
	else:
	with open(args.metadata_path, 'rb') as f:
	metadata = pickle.load(f)
	view = metadata['views'][1]
	m_args = metadata['args']
	prompt_1 = f'{m_args.style} {m_args.prompts[0]}'.strip()
	prompt_2 = f'{m_args.style} {m_args.prompts[1]}'.strip()


	# Animate
	animate_two_view(
	im_path,
	view,
	prompt_1,
	prompt_2,
	save_video_path=save_video_path,
	hold_duration=120,
	text_fade_duration=10,
	transition_duration=45,
	im_size=256,
	frame_size=384,
	)