callum-canavan
commited on
Commit
•
609badf
1
Parent(s):
1ad8665
Fix pipeline
Browse files- .gitignore +2 -1
- bapp.py +2 -1
- requirements.txt +1 -0
- test_video.py +1 -1
- visual_anagrams/animate.py +23 -20
- visual_anagrams/samplers.py +5 -4
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
env/
|
2 |
__pycache__/
|
3 |
*.png
|
4 |
-
*.mp4
|
|
|
|
1 |
env/
|
2 |
__pycache__/
|
3 |
*.png
|
4 |
+
*.mp4
|
5 |
+
*.gif
|
bapp.py
CHANGED
@@ -75,12 +75,13 @@ def generate_content(
|
|
75 |
choices = list(VIEW_MAP_NAMES.keys())
|
76 |
gradio_app = gr.Interface(
|
77 |
fn=generate_content,
|
|
|
78 |
inputs=[
|
79 |
gr.Textbox(label="Style", placeholder="an oil painting of"),
|
80 |
gr.Textbox(label="Prompt for original view", placeholder="a dress"),
|
81 |
gr.Textbox(label="Prompt for transformed view", placeholder="an old man"),
|
82 |
gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
|
83 |
-
gr.Number(label="Number of diffusion steps", value=
|
84 |
gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
|
85 |
],
|
86 |
outputs=[gr.Video(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
|
|
|
75 |
choices = list(VIEW_MAP_NAMES.keys())
|
76 |
gradio_app = gr.Interface(
|
77 |
fn=generate_content,
|
78 |
+
title="Multi-View Illusion Diffusion",
|
79 |
inputs=[
|
80 |
gr.Textbox(label="Style", placeholder="an oil painting of"),
|
81 |
gr.Textbox(label="Prompt for original view", placeholder="a dress"),
|
82 |
gr.Textbox(label="Prompt for transformed view", placeholder="an old man"),
|
83 |
gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
|
84 |
+
gr.Number(label="Number of diffusion steps", value=50, step=1, minimum=1, maximum=300),
|
85 |
gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
|
86 |
],
|
87 |
outputs=[gr.Video(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
|
requirements.txt
CHANGED
@@ -7,6 +7,7 @@ imageio
|
|
7 |
imageio[ffmpeg]
|
8 |
imageio[pyav]
|
9 |
opencv-python
|
|
|
10 |
safetensors
|
11 |
sentencepiece
|
12 |
transformers
|
|
|
7 |
imageio[ffmpeg]
|
8 |
imageio[pyav]
|
9 |
opencv-python
|
10 |
+
pygifsicle
|
11 |
safetensors
|
12 |
sentencepiece
|
13 |
transformers
|
test_video.py
CHANGED
@@ -7,5 +7,5 @@ if __name__ == "__main__":
|
|
7 |
get_views(["identity", "flip"])[1],
|
8 |
"a painting of vases",
|
9 |
"a painting of a sloth",
|
10 |
-
save_video_path="
|
11 |
)
|
|
|
7 |
get_views(["identity", "flip"])[1],
|
8 |
"a painting of vases",
|
9 |
"a painting of a sloth",
|
10 |
+
save_video_path="tmp.mp4",
|
11 |
)
|
visual_anagrams/animate.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import cv2
|
2 |
from tqdm import tqdm
|
3 |
import numpy as np
|
4 |
-
from PIL import Image, ImageDraw, ImageFont
|
5 |
import imageio
|
|
|
6 |
|
7 |
import torchvision.transforms.functional as TF
|
8 |
|
@@ -14,11 +15,12 @@ def draw_text(image, text, fill=(0,0,0), frame_size=384, im_size=256):
|
|
14 |
image = image.copy()
|
15 |
|
16 |
# Font info
|
|
|
17 |
font_size = 16
|
18 |
|
19 |
# Make PIL objects
|
20 |
draw = ImageDraw.Draw(image)
|
21 |
-
font = ImageFont.
|
22 |
|
23 |
# Center text horizontally, and vertically between
|
24 |
# illusion bottom and frame bottom
|
@@ -50,9 +52,9 @@ def animate_two_view(
|
|
50 |
prompt_1,
|
51 |
prompt_2,
|
52 |
save_video_path='tmp.mp4',
|
53 |
-
hold_duration=
|
54 |
text_fade_duration=10,
|
55 |
-
transition_duration=
|
56 |
im_size=256,
|
57 |
frame_size=384,
|
58 |
):
|
@@ -114,22 +116,23 @@ def animate_two_view(
|
|
114 |
|
115 |
# Move last bit of clip to front
|
116 |
frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
133 |
|
134 |
|
135 |
|
|
|
1 |
import cv2
|
2 |
from tqdm import tqdm
|
3 |
import numpy as np
|
4 |
+
from PIL import Image, ImageDraw, ImageFont, ImageChops
|
5 |
import imageio
|
6 |
+
from pygifsicle import optimize
|
7 |
|
8 |
import torchvision.transforms.functional as TF
|
9 |
|
|
|
15 |
image = image.copy()
|
16 |
|
17 |
# Font info
|
18 |
+
font_path = get_courier_font_path()
|
19 |
font_size = 16
|
20 |
|
21 |
# Make PIL objects
|
22 |
draw = ImageDraw.Draw(image)
|
23 |
+
font = ImageFont.truetype(font_path, font_size)
|
24 |
|
25 |
# Center text horizontally, and vertically between
|
26 |
# illusion bottom and frame bottom
|
|
|
52 |
prompt_1,
|
53 |
prompt_2,
|
54 |
save_video_path='tmp.mp4',
|
55 |
+
hold_duration=60,
|
56 |
text_fade_duration=10,
|
57 |
+
transition_duration=80,
|
58 |
im_size=256,
|
59 |
frame_size=384,
|
60 |
):
|
|
|
116 |
|
117 |
# Move last bit of clip to front
|
118 |
frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
|
119 |
+
images = frames
|
120 |
+
|
121 |
+
processed_frames = [images[0]]
|
122 |
+
|
123 |
+
for i in range(1, len(images)):
|
124 |
+
# Calculate the difference between current and previous frame
|
125 |
+
diff = ImageChops.difference(images[i], images[i - 1])
|
126 |
+
# Create a mask to isolate changes
|
127 |
+
mask = diff.convert("L").point(lambda x: 0 if x < 5 else 255, "1")
|
128 |
+
# Apply the mask to the current frame
|
129 |
+
new_frame = ImageChops.composite(images[i], processed_frames[-1], mask)
|
130 |
+
processed_frames.append(new_frame)
|
131 |
+
|
132 |
+
# Save the frames as a GIF
|
133 |
+
imageio.mimsave(save_video_path,
|
134 |
+
[np.array(frame) for frame in processed_frames],
|
135 |
+
fps=30)
|
136 |
|
137 |
|
138 |
|
visual_anagrams/samplers.py
CHANGED
@@ -30,7 +30,7 @@ def sample_stage_1(model,
|
|
30 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
31 |
|
32 |
# Setup timesteps
|
33 |
-
model.scheduler.set_timesteps(num_inference_steps, device=device)
|
34 |
timesteps = model.scheduler.timesteps
|
35 |
|
36 |
# Make intermediate_images
|
@@ -45,7 +45,7 @@ def sample_stage_1(model,
|
|
45 |
)
|
46 |
# ic(noisy_images.shape)
|
47 |
|
48 |
-
for i, t in tqdm(
|
49 |
# Apply views to noisy_image
|
50 |
viewed_noisy_images = []
|
51 |
for view_fn in views:
|
@@ -109,6 +109,7 @@ def sample_stage_1(model,
|
|
109 |
# ic(noise_pred.shape)
|
110 |
|
111 |
# ic(t.shape)
|
|
|
112 |
# compute the previous noisy sample x_t -> x_t-1
|
113 |
noisy_images = model.scheduler.step(
|
114 |
noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
|
@@ -148,7 +149,7 @@ def sample_stage_2(model,
|
|
148 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
149 |
|
150 |
# Get timesteps
|
151 |
-
model.scheduler.set_timesteps(num_inference_steps, device=device)
|
152 |
timesteps = model.scheduler.timesteps
|
153 |
|
154 |
num_channels = model.unet.config.in_channels // 2
|
@@ -236,7 +237,7 @@ def sample_stage_2(model,
|
|
236 |
|
237 |
# compute the previous noisy sample x_t -> x_t-1
|
238 |
noisy_images = model.scheduler.step(
|
239 |
-
noise_pred, t, noisy_images, generator=generator, return_dict=False
|
240 |
)[0]
|
241 |
|
242 |
# Return denoised images
|
|
|
30 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
31 |
|
32 |
# Setup timesteps
|
33 |
+
model.scheduler.set_timesteps(int(num_inference_steps), device=device)
|
34 |
timesteps = model.scheduler.timesteps
|
35 |
|
36 |
# Make intermediate_images
|
|
|
45 |
)
|
46 |
# ic(noisy_images.shape)
|
47 |
|
48 |
+
for i, t in enumerate(tqdm(timesteps)):
|
49 |
# Apply views to noisy_image
|
50 |
viewed_noisy_images = []
|
51 |
for view_fn in views:
|
|
|
109 |
# ic(noise_pred.shape)
|
110 |
|
111 |
# ic(t.shape)
|
112 |
+
# ic(t.dtype)
|
113 |
# compute the previous noisy sample x_t -> x_t-1
|
114 |
noisy_images = model.scheduler.step(
|
115 |
noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
|
|
|
149 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
150 |
|
151 |
# Get timesteps
|
152 |
+
model.scheduler.set_timesteps(int(num_inference_steps), device=device)
|
153 |
timesteps = model.scheduler.timesteps
|
154 |
|
155 |
num_channels = model.unet.config.in_channels // 2
|
|
|
237 |
|
238 |
# compute the previous noisy sample x_t -> x_t-1
|
239 |
noisy_images = model.scheduler.step(
|
240 |
+
noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
|
241 |
)[0]
|
242 |
|
243 |
# Return denoised images
|