File size: 5,708 Bytes
b3ee019 381e596 609badf 381e596 609badf 381e596 609badf 381e596 609badf 381e596 609badf 381e596 609badf 381e596 609badf cca580a 381e596 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
import cv2
from tqdm import tqdm
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageChops
import imageio
from pygifsicle import optimize
import torchvision.transforms.functional as TF
from visual_anagrams.views import get_views
from visual_anagrams.utils import get_courier_font_path
def draw_text(image, text, fill=(0,0,0), frame_size=384, im_size=256):
image = image.copy()
# Font info
font_path = get_courier_font_path()
font_size = 16
# Make PIL objects
draw = ImageDraw.Draw(image)
font = ImageFont.truetype(font_path, font_size)
# Center text horizontally, and vertically between
# illusion bottom and frame bottom
text_position = (0, 0)
bbox = draw.textbbox(text_position, text, font=font, align='center')
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
text_left = (frame_size - text_width) // 2
text_top = int(3/4 * frame_size + 1/4 * im_size - 1/2 * text_height)
text_position = (text_left, text_top)
# Draw text on image
draw.text(text_position, text, font=font, fill=fill, align='center')
return image
def easeInOutQuint(x):
# From Matthew Tancik:
# https://github.com/tancik/Illusion-Diffusion/blob/main/IllusionDiffusion.ipynb
if x < 0.5:
return 4 * x**3
else:
return 1 - (-2 * x + 2)**3 / 2
def animate_two_view(
im_path,
view,
prompt_1,
prompt_2,
save_video_path='tmp.mp4',
hold_duration=60,
text_fade_duration=10,
transition_duration=80,
im_size=256,
frame_size=384,
):
'''
TODO: Assuming two views, first one is identity
'''
im = Image.open(im_path)
# Make list of frames
frames = []
# Make frames for two views
frame_1 = view.make_frame(im, 0.0)
frame_2 = view.make_frame(im, 1.0)
# Display frame 1 with text
frame_1_text = draw_text(frame_1,
prompt_1,
frame_size=frame_size,
im_size=im_size)
frames += [frame_1_text] * (hold_duration // 2)
# Fade out text 1
for t in np.linspace(0,1,text_fade_duration):
c = int(t * 255)
fill = (c,c,c)
frame = draw_text(frame_1,
prompt_1,
fill=fill,
frame_size=frame_size,
im_size=im_size)
frames.append(frame)
# Transition view 1 -> view 2
for t in tqdm(np.linspace(0,1,transition_duration)):
t_ease = easeInOutQuint(t)
frames.append(view.make_frame(im, t_ease))
# Fade in text 2
for t in np.linspace(1,0,text_fade_duration):
c = int(t * 255)
fill = (c,c,c)
frame = draw_text(frame_2,
prompt_2,
fill=fill,
frame_size=frame_size,
im_size=im_size)
frames.append(frame)
# Display frame 2 with text
frame_2_text = draw_text(frame_2,
prompt_2,
frame_size=frame_size,
im_size=im_size)
frames += [frame_2_text] * (hold_duration // 2)
# "Boomerang" the clip, so we get back to view 1
frames = frames + frames[::-1]
# Move last bit of clip to front
frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
images = frames
# Save the frames as a GIF
image_array = [imageio.core.asarray(frame) for frame in frames]
# Save as video
print('Making video...')
imageio.mimsave(save_video_path, image_array, fps=30)
if __name__ == '__main__':
import argparse
import pickle
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument("--im_path", required=True, type=str, help='Path to the illusion to animate')
parser.add_argument("--save_video_path", default=None, type=str,
help='Path to save video to. If None, defaults to `im_path`, with extension `.mp4`')
parser.add_argument("--metadata_path", default=None, type=str, help='Path to metadata. If specified, overrides `view` and `prompt` args')
parser.add_argument("--view", default=None, type=str, help='Name of view to use')
parser.add_argument("--prompt_1", default='', nargs='+', type=str,
help='Prompt for first view. Passing multiple will join them with newlines.')
parser.add_argument("--prompt_2", default='', nargs='+', type=str,
help='Prompt for first view. Passing multiple will join them with newlines.')
args = parser.parse_args()
# Load image
im_path = Path(args.im_path)
# Get save dir
if args.save_video_path is None:
save_video_path = im_path.with_suffix('.mp4')
if args.metadata_path is None:
# Join prompts with newlines
prompt_1 = '\n'.join(args.prompt_1)
prompt_2 = '\n'.join(args.prompt_2)
# Get paths and views
view = get_views([args.view])[0]
else:
with open(args.metadata_path, 'rb') as f:
metadata = pickle.load(f)
view = metadata['views'][1]
m_args = metadata['args']
prompt_1 = f'{m_args.style} {m_args.prompts[0]}'.strip()
prompt_2 = f'{m_args.style} {m_args.prompts[1]}'.strip()
# Animate
animate_two_view(
im_path,
view,
prompt_1,
prompt_2,
save_video_path=save_video_path,
hold_duration=120,
text_fade_duration=10,
transition_duration=45,
im_size=256,
frame_size=384,
) |