File size: 5,708 Bytes
b3ee019
381e596
 
609badf
381e596
609badf
381e596
 
 
 
 
 
 
 
 
 
 
609badf
381e596
 
 
 
609badf
381e596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609badf
381e596
609badf
381e596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609badf
 
 
 
cca580a
 
 
 
 
381e596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import cv2
from tqdm import tqdm
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageChops
import imageio
from pygifsicle import optimize

import torchvision.transforms.functional as TF

from visual_anagrams.views import get_views
from visual_anagrams.utils import get_courier_font_path


def draw_text(image, text, fill=(0,0,0), frame_size=384, im_size=256):
    image = image.copy()

    # Font info
    font_path = get_courier_font_path()
    font_size = 16

    # Make PIL objects
    draw = ImageDraw.Draw(image)
    font = ImageFont.truetype(font_path, font_size)
    
    # Center text horizontally, and vertically between
    # illusion bottom and frame bottom
    text_position = (0, 0)
    bbox = draw.textbbox(text_position, text, font=font, align='center')
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]
    text_left = (frame_size - text_width) // 2
    text_top = int(3/4 * frame_size + 1/4 * im_size - 1/2 * text_height)
    text_position = (text_left, text_top)

    # Draw text on image
    draw.text(text_position, text, font=font, fill=fill, align='center')
    return image


def easeInOutQuint(x):
    # From Matthew Tancik: 
    # https://github.com/tancik/Illusion-Diffusion/blob/main/IllusionDiffusion.ipynb
    if x < 0.5:
        return 4 * x**3
    else:
        return 1 - (-2 * x + 2)**3 / 2


def animate_two_view(
        im_path,
        view,
        prompt_1,
        prompt_2,
        save_video_path='tmp.mp4',
        hold_duration=60,
        text_fade_duration=10,
        transition_duration=80,
        im_size=256,
        frame_size=384,
):
    '''
    TODO: Assuming two views, first one is identity
    '''
    im = Image.open(im_path)

    # Make list of frames
    frames = []

    # Make frames for two views 
    frame_1 = view.make_frame(im, 0.0)
    frame_2 = view.make_frame(im, 1.0)

    # Display frame 1 with text
    frame_1_text = draw_text(frame_1, 
                             prompt_1, 
                             frame_size=frame_size, 
                             im_size=im_size)
    frames += [frame_1_text] * (hold_duration // 2)

    # Fade out text 1
    for t in np.linspace(0,1,text_fade_duration):
        c = int(t * 255)
        fill = (c,c,c)
        frame = draw_text(frame_1, 
                          prompt_1, 
                          fill=fill,
                          frame_size=frame_size, 
                          im_size=im_size)
        frames.append(frame)

    # Transition view 1 -> view 2
    for t in tqdm(np.linspace(0,1,transition_duration)):
        t_ease = easeInOutQuint(t)
        frames.append(view.make_frame(im, t_ease))

    # Fade in text 2
    for t in np.linspace(1,0,text_fade_duration):
        c = int(t * 255)
        fill = (c,c,c)
        frame = draw_text(frame_2,
                          prompt_2,
                          fill=fill,
                          frame_size=frame_size, 
                          im_size=im_size)
        frames.append(frame)

    # Display frame 2 with text
    frame_2_text = draw_text(frame_2, 
                             prompt_2, 
                             frame_size=frame_size, 
                             im_size=im_size)
    frames += [frame_2_text] * (hold_duration // 2)

    # "Boomerang" the clip, so we get back to view 1
    frames = frames + frames[::-1]

    # Move last bit of clip to front
    frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
    images = frames


    # Save the frames as a GIF
    image_array = [imageio.core.asarray(frame) for frame in frames]

    # Save as video
    print('Making video...')
    imageio.mimsave(save_video_path, image_array, fps=30)



if __name__ == '__main__':
    import argparse
    import pickle
    from pathlib import Path

    parser = argparse.ArgumentParser()
    parser.add_argument("--im_path", required=True, type=str, help='Path to the illusion to animate')
    parser.add_argument("--save_video_path", default=None, type=str, 
        help='Path to save video to. If None, defaults to `im_path`, with extension `.mp4`')
    parser.add_argument("--metadata_path", default=None, type=str, help='Path to metadata. If specified, overrides `view` and `prompt` args')
    parser.add_argument("--view", default=None, type=str, help='Name of view to use')
    parser.add_argument("--prompt_1", default='', nargs='+', type=str,
        help='Prompt for first view. Passing multiple will join them with newlines.')
    parser.add_argument("--prompt_2", default='', nargs='+', type=str,
        help='Prompt for first view. Passing multiple will join them with newlines.')
    args = parser.parse_args()


    # Load image
    im_path = Path(args.im_path)

    # Get save dir
    if args.save_video_path is None:
        save_video_path = im_path.with_suffix('.mp4')

    if args.metadata_path is None:
        # Join prompts with newlines
        prompt_1 = '\n'.join(args.prompt_1)
        prompt_2 = '\n'.join(args.prompt_2)

        # Get paths and views
        view = get_views([args.view])[0]
    else:
        with open(args.metadata_path, 'rb') as f:
            metadata = pickle.load(f)
        view = metadata['views'][1]
        m_args = metadata['args']
        prompt_1 = f'{m_args.style} {m_args.prompts[0]}'.strip()
        prompt_2 = f'{m_args.style} {m_args.prompts[1]}'.strip()


    # Animate
    animate_two_view(
            im_path,
            view,
            prompt_1,
            prompt_2,
            save_video_path=save_video_path,
            hold_duration=120,
            text_fade_duration=10,
            transition_duration=45,
            im_size=256,
            frame_size=384,
        )