callum-canavan commited on
Commit
a65ed45
1 Parent(s): ba23d57

Update app to illusion generation

Browse files
Files changed (4) hide show
  1. app.py +4 -5
  2. bapp.py +78 -0
  3. requirements.txt +2 -0
  4. visual_anagrams/views/__init__.py +13 -0
app.py CHANGED
@@ -2,6 +2,7 @@ from diffusers import DiffusionPipeline
2
  from diffusers.utils import pt_to_pil
3
  import gradio as gr
4
  import torch
 
5
 
6
 
7
  stage_1 = DiffusionPipeline.from_pretrained(
@@ -33,9 +34,7 @@ stage_3.enable_xformers_memory_efficient_attention() # remove line if torch.__v
33
  stage_3.enable_model_cpu_offload()
34
 
35
 
36
- def predict(input_img):
37
- prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
38
-
39
  prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt)
40
  generator = torch.manual_seed(0)
41
  image = stage_1(
@@ -53,7 +52,7 @@ def predict(input_img):
53
  ).images
54
  image = stage_3(
55
  prompt=prompt, image=image, generator=generator, noise_level=100
56
- ).images
57
  return image
58
 
59
 
@@ -66,4 +65,4 @@ gradio_app = gr.Interface(
66
  )
67
 
68
  if __name__ == "__main__":
69
- gradio_app.launch() # server_name="0.0.0.0"
 
2
  from diffusers.utils import pt_to_pil
3
  import gradio as gr
4
  import torch
5
+ import numpy as np
6
 
7
 
8
  stage_1 = DiffusionPipeline.from_pretrained(
 
34
  stage_3.enable_model_cpu_offload()
35
 
36
 
37
+ def predict(prompt):
 
 
38
  prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt)
39
  generator = torch.manual_seed(0)
40
  image = stage_1(
 
52
  ).images
53
  image = stage_3(
54
  prompt=prompt, image=image, generator=generator, noise_level=100
55
+ ).images[0]
56
  return image
57
 
58
 
 
65
  )
66
 
67
  if __name__ == "__main__":
68
+ gradio_app.launch(server_name="0.0.0.0") # server_name="0.0.0.0"
bapp.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import gradio as gr
5
+ import torch
6
+ from diffusers import DiffusionPipeline
7
+
8
+ from visual_anagrams.views import get_views, VIEW_MAP_NAMES
9
+ from visual_anagrams.samplers import sample_stage_1, sample_stage_2
10
+ from visual_anagrams.utils import add_args, save_illusion, save_metadata
11
+
12
+ stage_1 = DiffusionPipeline.from_pretrained(
13
+ "DeepFloyd/IF-I-M-v1.0",
14
+ variant="fp16",
15
+ torch_dtype=torch.float16)
16
+ stage_2 = DiffusionPipeline.from_pretrained(
17
+ "DeepFloyd/IF-II-M-v1.0",
18
+ text_encoder=None,
19
+ variant="fp16",
20
+ torch_dtype=torch.float16,
21
+ )
22
+ stage_1.enable_model_cpu_offload()
23
+ stage_2.enable_model_cpu_offload()
24
+
25
+
26
+ def generate_content(
27
+ style,
28
+ prompt_for_original,
29
+ prompt_for_transformed,
30
+ transformation,
31
+ num_inference_steps,
32
+ seed
33
+ ):
34
+ prompts = [prompt_for_original, prompt_for_transformed]
35
+ prompt_embeds = [stage_1.encode_prompt(f'{style} {p}'.strip()) for p in [prompts]]
36
+ prompt_embeds, negative_prompt_embeds = zip(*prompt_embeds)
37
+ prompt_embeds = torch.cat(prompt_embeds)
38
+ negative_prompt_embeds = torch.cat(negative_prompt_embeds)
39
+
40
+ views = ['identity', transformation]
41
+ views = get_views(views)
42
+
43
+ generator = torch.manual_seed(seed)
44
+ image = sample_stage_1(stage_1,
45
+ prompt_embeds,
46
+ negative_prompt_embeds,
47
+ views,
48
+ num_inference_steps=num_inference_steps,
49
+ generator=generator)
50
+
51
+ image = sample_stage_2(stage_2,
52
+ image,
53
+ prompt_embeds,
54
+ negative_prompt_embeds,
55
+ views,
56
+ num_inference_steps=num_inference_steps,
57
+ generator=generator)
58
+
59
+ return image, image_transformed, transformation_gif
60
+
61
+
62
+ choices = list(VIEW_MAP_NAMES.keys())
63
+ gradio_app = gr.Interface(
64
+ fn=generate_content,
65
+ inputs=[
66
+ gr.Textbox(label="Style", placeholder="an oil painting of"),
67
+ gr.Textbox(label="Prompt for original view", placeholder="a penguin"),
68
+ gr.Textbox(label="Prompt for transformed view", placeholder="a giraffe"),
69
+ gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
70
+ gr.Number(label="Number of diffusion steps", value=30, step=1, minimum=1, maximum=100),
71
+ gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
72
+ ],
73
+ outputs=[gr.Image(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
74
+ )
75
+
76
+
77
+ if __name__ == "__main__":
78
+ gradio_app.launch(server_name="0.0.0.0") # server_name="0.0.0.0"
requirements.txt CHANGED
@@ -1,8 +1,10 @@
1
  accelerate
2
  diffusers
 
3
  gradio
4
  safetensors
5
  sentencepiece
6
  transformers
7
  torch
 
8
  xformers
 
1
  accelerate
2
  diffusers
3
+ einops
4
  gradio
5
  safetensors
6
  sentencepiece
7
  transformers
8
  torch
9
+ torchvision
10
  xformers
visual_anagrams/views/__init__.py CHANGED
@@ -25,6 +25,19 @@ VIEW_MAP = {
25
  'inner_circle': InnerCircleView,
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def get_views(view_names):
29
  '''
30
  Bespoke function to get views (just to make command line usage easier)
 
25
  'inner_circle': InnerCircleView,
26
  }
27
 
28
+ VIEW_MAP_NAMES = {
29
+ 'Flip': 'flip',
30
+ 'Rotate 90° clockwise': 'rotate_cw',
31
+ 'Rotate 90° counter-clockwise': 'rotate_ccw',
32
+ 'Rotate 180°': 'rotate_180',
33
+ 'Invert colors': 'negate',
34
+ 'Shear': 'skew',
35
+ 'Patch permutation': 'patch_permute',
36
+ 'Pixel permutation': 'pixel_permute',
37
+ 'Jigsaw permutation': 'jigsaw',
38
+ 'Rotate inner circle': 'inner_circle',
39
+ }
40
+
41
  def get_views(view_names):
42
  '''
43
  Bespoke function to get views (just to make command line usage easier)