patrickvonplaten commited on
Commit
e58dd86
1 Parent(s): 99ebd8b
control_net.py CHANGED
@@ -1,34 +1,28 @@
1
  #!/usr/bin/env python3
2
  import torch
3
- import numpy as np
4
  import os
5
  from huggingface_hub import HfApi
6
  from pathlib import Path
7
- import cv2
8
- from PIL import Image
9
  from diffusers.utils import load_image
 
10
 
11
  from diffusers import (
12
  ControlNetModel,
13
  StableDiffusionControlNetPipeline,
14
  UniPCMultistepScheduler,
15
  )
 
 
 
16
 
17
  image = load_image(
18
- "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
19
  )
20
 
21
- image = np.array(image)
22
-
23
- low_threshold = 100
24
- high_threshold = 200
25
-
26
- image = cv2.Canny(image, low_threshold, high_threshold)
27
- image = image[:, :, None]
28
- image = np.concatenate([image, image, image], axis=2)
29
- canny_image = Image.fromarray(image)
30
 
31
- controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
32
  pipe = StableDiffusionControlNetPipeline.from_pretrained(
33
  "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
34
  )
@@ -36,8 +30,8 @@ pipe = StableDiffusionControlNetPipeline.from_pretrained(
36
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
37
  pipe.enable_model_cpu_offload()
38
 
39
- generator = torch.manual_seed(0)
40
- out_image = pipe("futuristic-looking woman", num_inference_steps=20, generator=generator, image=canny_image).images[0]
41
 
42
  path = os.path.join(Path.home(), "images", "aa.png")
43
  out_image.save(path)
 
1
  #!/usr/bin/env python3
2
  import torch
 
3
  import os
4
  from huggingface_hub import HfApi
5
  from pathlib import Path
 
 
6
  from diffusers.utils import load_image
7
+ from controlnet_aux import CannyDetector
8
 
9
  from diffusers import (
10
  ControlNetModel,
11
  StableDiffusionControlNetPipeline,
12
  UniPCMultistepScheduler,
13
  )
14
+ import sys
15
+
16
+ checkpoint = sys.argv[1]
17
 
18
  image = load_image(
19
+ "https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png"
20
  )
21
 
22
+ canny_detector = CannyDetector()
23
+ canny_image = canny_detector(image, low_threshold=100, high_threshold=200)
 
 
 
 
 
 
 
24
 
25
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
26
  pipe = StableDiffusionControlNetPipeline.from_pretrained(
27
  "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
28
  )
 
30
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
31
  pipe.enable_model_cpu_offload()
32
 
33
+ generator = torch.manual_seed(33)
34
+ out_image = pipe("a blue paradise bird in the jungle", num_inference_steps=20, generator=generator, image=canny_image).images[0]
35
 
36
  path = os.path.join(Path.home(), "images", "aa.png")
37
  out_image.save(path)
control_net_canny.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ from diffusers.utils import load_image
7
+ from controlnet_aux import CannyDetector
8
+
9
+ from diffusers import (
10
+ ControlNetModel,
11
+ StableDiffusionControlNetPipeline,
12
+ UniPCMultistepScheduler,
13
+ )
14
+ import sys
15
+
16
+ checkpoint = sys.argv[1]
17
+
18
+ image = load_image(
19
+ "https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png"
20
+ )
21
+
22
+ canny_detector = CannyDetector()
23
+ canny_image = canny_detector(image, low_threshold=100, high_threshold=200)
24
+
25
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
26
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
27
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
28
+ )
29
+
30
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
31
+ pipe.enable_model_cpu_offload()
32
+
33
+ generator = torch.manual_seed(33)
34
+ out_image = pipe("a blue paradise bird in the jungle", num_inference_steps=20, generator=generator, image=canny_image).images[0]
35
+
36
+ path = os.path.join(Path.home(), "images", "aa.png")
37
+ out_image.save(path)
38
+
39
+ api = HfApi()
40
+
41
+ api.upload_file(
42
+ path_or_fileobj=path,
43
+ path_in_repo=path.split("/")[-1],
44
+ repo_id="patrickvonplaten/images",
45
+ repo_type="dataset",
46
+ )
47
+ print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png")
control_net_depth.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ from diffusers.utils import load_image
7
+ from PIL import Image
8
+ import numpy as np
9
+ from transformers import pipeline
10
+
11
+ from diffusers import (
12
+ ControlNetModel,
13
+ StableDiffusionControlNetPipeline,
14
+ UniPCMultistepScheduler,
15
+ )
16
+ import sys
17
+
18
+ checkpoint = sys.argv[1]
19
+
20
+ image = load_image("https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png")
21
+
22
+ prompt = "Stormtrooper's lecture in beautiful lecture hall"
23
+
24
+
25
+ depth_estimator = pipeline('depth-estimation')
26
+ image = depth_estimator(image)['depth']
27
+ image = np.array(image)
28
+ image = image[:, :, None]
29
+ image = np.concatenate([image, image, image], axis=2)
30
+ image = Image.fromarray(image)
31
+
32
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
33
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
34
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
35
+ )
36
+
37
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
38
+ pipe.enable_model_cpu_offload()
39
+
40
+ generator = torch.manual_seed(0)
41
+ out_image = pipe(prompt, num_inference_steps=40, generator=generator, image=image).images[0]
42
+
43
+ path = os.path.join(Path.home(), "images", "aa.png")
44
+ out_image.save(path)
45
+
46
+ api = HfApi()
47
+
48
+ api.upload_file(
49
+ path_or_fileobj=path,
50
+ path_in_repo=path.split("/")[-1],
51
+ repo_id="patrickvonplaten/images",
52
+ repo_type="dataset",
53
+ )
54
+ print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png")
control_net_mlsd.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ from diffusers.utils import load_image
7
+ from PIL import Image
8
+ import numpy as np
9
+ from controlnet_aux import MLSDdetector
10
+
11
+ from diffusers import (
12
+ ControlNetModel,
13
+ StableDiffusionControlNetPipeline,
14
+ UniPCMultistepScheduler,
15
+ )
16
+ import sys
17
+
18
+ checkpoint = sys.argv[1]
19
+
20
+ image = load_image("https://huggingface.co/lllyasviel/sd-controlnet-mlsd/resolve/main/images/room.png")
21
+
22
+ prompt = "royal chamber with fancy bed"
23
+
24
+ mlsd = MLSDdetector.from_pretrained('lllyasviel/ControlNet')
25
+ image = mlsd(image)
26
+
27
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
28
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
29
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
30
+ )
31
+
32
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
33
+ pipe.enable_model_cpu_offload()
34
+
35
+ generator = torch.manual_seed(0)
36
+ out_image = pipe(prompt, num_inference_steps=30, generator=generator, image=image).images[0]
37
+
38
+ path = os.path.join(Path.home(), "images", "aa.png")
39
+ out_image.save(path)
40
+
41
+ api = HfApi()
42
+
43
+ api.upload_file(
44
+ path_or_fileobj=path,
45
+ path_in_repo=path.split("/")[-1],
46
+ repo_id="patrickvonplaten/images",
47
+ repo_type="dataset",
48
+ )
49
+ print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png")
control_net_open_pose.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ from diffusers.utils import load_image
7
+ from controlnet_aux import OpenposeDetector
8
+
9
+ from diffusers import (
10
+ ControlNetModel,
11
+ StableDiffusionControlNetPipeline,
12
+ UniPCMultistepScheduler,
13
+ )
14
+ import sys
15
+
16
+ checkpoint = sys.argv[1]
17
+
18
+ image = load_image("https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png")
19
+ prompt = "chef in the kitchen"
20
+
21
+
22
+ openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
23
+ image = openpose(image)
24
+
25
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
26
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
27
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
28
+ )
29
+
30
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
31
+ pipe.enable_model_cpu_offload()
32
+
33
+ generator = torch.manual_seed(33)
34
+ out_image = pipe(prompt, num_inference_steps=20, generator=generator, image=image).images[0]
35
+
36
+ path = os.path.join(Path.home(), "images", "aa.png")
37
+ out_image.save(path)
38
+
39
+ api = HfApi()
40
+
41
+ api.upload_file(
42
+ path_or_fileobj=path,
43
+ path_in_repo=path.split("/")[-1],
44
+ repo_id="patrickvonplaten/images",
45
+ repo_type="dataset",
46
+ )
47
+ print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png")
control_net_scribble.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ from diffusers.utils import load_image
7
+ from PIL import Image
8
+ import numpy as np
9
+ from controlnet_aux import HEDdetector
10
+
11
+ from diffusers import (
12
+ ControlNetModel,
13
+ StableDiffusionControlNetPipeline,
14
+ UniPCMultistepScheduler,
15
+ )
16
+ import sys
17
+
18
+ checkpoint = sys.argv[1]
19
+
20
+ image = load_image("https://huggingface.co/lllyasviel/sd-controlnet-scribble/resolve/main/images/bag.png")
21
+
22
+ prompt = "fancy Gucci bag on a table"
23
+
24
+ hed = HEDdetector.from_pretrained('lllyasviel/ControlNet')
25
+ image = hed(image, scribble=True)
26
+
27
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
28
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
29
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
30
+ )
31
+
32
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
33
+ pipe.enable_model_cpu_offload()
34
+
35
+ generator = torch.manual_seed(33)
36
+ out_image = pipe(prompt, num_inference_steps=30, generator=generator, image=image).images[0]
37
+
38
+ path = os.path.join(Path.home(), "images", "aa.png")
39
+ out_image.save(path)
40
+
41
+ api = HfApi()
42
+
43
+ api.upload_file(
44
+ path_or_fileobj=path,
45
+ path_in_repo=path.split("/")[-1],
46
+ repo_id="patrickvonplaten/images",
47
+ repo_type="dataset",
48
+ )
49
+ print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png")
control_net_seg.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ from diffusers.utils import load_image
7
+ from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
8
+ from PIL import Image
9
+ import numpy as np
10
+
11
+ from diffusers import (
12
+ ControlNetModel,
13
+ StableDiffusionControlNetPipeline,
14
+ UniPCMultistepScheduler,
15
+ )
16
+ import sys
17
+
18
+ image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small")
19
+ image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small")
20
+
21
+ checkpoint = sys.argv[1]
22
+
23
+ ada_palette = np.asarray([
24
+ [0, 0, 0],
25
+ [120, 120, 120],
26
+ [180, 120, 120],
27
+ [6, 230, 230],
28
+ [80, 50, 50],
29
+ [4, 200, 3],
30
+ [120, 120, 80],
31
+ [140, 140, 140],
32
+ [204, 5, 255],
33
+ [230, 230, 230],
34
+ [4, 250, 7],
35
+ [224, 5, 255],
36
+ [235, 255, 7],
37
+ [150, 5, 61],
38
+ [120, 120, 70],
39
+ [8, 255, 51],
40
+ [255, 6, 82],
41
+ [143, 255, 140],
42
+ [204, 255, 4],
43
+ [255, 51, 7],
44
+ [204, 70, 3],
45
+ [0, 102, 200],
46
+ [61, 230, 250],
47
+ [255, 6, 51],
48
+ [11, 102, 255],
49
+ [255, 7, 71],
50
+ [255, 9, 224],
51
+ [9, 7, 230],
52
+ [220, 220, 220],
53
+ [255, 9, 92],
54
+ [112, 9, 255],
55
+ [8, 255, 214],
56
+ [7, 255, 224],
57
+ [255, 184, 6],
58
+ [10, 255, 71],
59
+ [255, 41, 10],
60
+ [7, 255, 255],
61
+ [224, 255, 8],
62
+ [102, 8, 255],
63
+ [255, 61, 6],
64
+ [255, 194, 7],
65
+ [255, 122, 8],
66
+ [0, 255, 20],
67
+ [255, 8, 41],
68
+ [255, 5, 153],
69
+ [6, 51, 255],
70
+ [235, 12, 255],
71
+ [160, 150, 20],
72
+ [0, 163, 255],
73
+ [140, 140, 140],
74
+ [250, 10, 15],
75
+ [20, 255, 0],
76
+ [31, 255, 0],
77
+ [255, 31, 0],
78
+ [255, 224, 0],
79
+ [153, 255, 0],
80
+ [0, 0, 255],
81
+ [255, 71, 0],
82
+ [0, 235, 255],
83
+ [0, 173, 255],
84
+ [31, 0, 255],
85
+ [11, 200, 200],
86
+ [255, 82, 0],
87
+ [0, 255, 245],
88
+ [0, 61, 255],
89
+ [0, 255, 112],
90
+ [0, 255, 133],
91
+ [255, 0, 0],
92
+ [255, 163, 0],
93
+ [255, 102, 0],
94
+ [194, 255, 0],
95
+ [0, 143, 255],
96
+ [51, 255, 0],
97
+ [0, 82, 255],
98
+ [0, 255, 41],
99
+ [0, 255, 173],
100
+ [10, 0, 255],
101
+ [173, 255, 0],
102
+ [0, 255, 153],
103
+ [255, 92, 0],
104
+ [255, 0, 255],
105
+ [255, 0, 245],
106
+ [255, 0, 102],
107
+ [255, 173, 0],
108
+ [255, 0, 20],
109
+ [255, 184, 184],
110
+ [0, 31, 255],
111
+ [0, 255, 61],
112
+ [0, 71, 255],
113
+ [255, 0, 204],
114
+ [0, 255, 194],
115
+ [0, 255, 82],
116
+ [0, 10, 255],
117
+ [0, 112, 255],
118
+ [51, 0, 255],
119
+ [0, 194, 255],
120
+ [0, 122, 255],
121
+ [0, 255, 163],
122
+ [255, 153, 0],
123
+ [0, 255, 10],
124
+ [255, 112, 0],
125
+ [143, 255, 0],
126
+ [82, 0, 255],
127
+ [163, 255, 0],
128
+ [255, 235, 0],
129
+ [8, 184, 170],
130
+ [133, 0, 255],
131
+ [0, 255, 92],
132
+ [184, 0, 255],
133
+ [255, 0, 31],
134
+ [0, 184, 255],
135
+ [0, 214, 255],
136
+ [255, 0, 112],
137
+ [92, 255, 0],
138
+ [0, 224, 255],
139
+ [112, 224, 255],
140
+ [70, 184, 160],
141
+ [163, 0, 255],
142
+ [153, 0, 255],
143
+ [71, 255, 0],
144
+ [255, 0, 163],
145
+ [255, 204, 0],
146
+ [255, 0, 143],
147
+ [0, 255, 235],
148
+ [133, 255, 0],
149
+ [255, 0, 235],
150
+ [245, 0, 255],
151
+ [255, 0, 122],
152
+ [255, 245, 0],
153
+ [10, 190, 212],
154
+ [214, 255, 0],
155
+ [0, 204, 255],
156
+ [20, 0, 255],
157
+ [255, 255, 0],
158
+ [0, 153, 255],
159
+ [0, 41, 255],
160
+ [0, 255, 204],
161
+ [41, 0, 255],
162
+ [41, 255, 0],
163
+ [173, 0, 255],
164
+ [0, 245, 255],
165
+ [71, 0, 255],
166
+ [122, 0, 255],
167
+ [0, 255, 184],
168
+ [0, 92, 255],
169
+ [184, 255, 0],
170
+ [0, 133, 255],
171
+ [255, 214, 0],
172
+ [25, 194, 194],
173
+ [102, 255, 0],
174
+ [92, 0, 255],
175
+ ])
176
+
177
+ image = load_image("https://huggingface.co/lllyasviel/sd-controlnet-seg/resolve/main/images/house.png").convert('RGB')
178
+
179
+ prompt = "old house in stormy weather with rain and wind"
180
+
181
+ pixel_values = image_processor(image, return_tensors="pt").pixel_values
182
+ with torch.no_grad():
183
+ outputs = image_segmentor(pixel_values)
184
+ seg = image_processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
185
+ color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
186
+ for label, color in enumerate(ada_palette):
187
+ color_seg[seg == label, :] = color
188
+ color_seg = color_seg.astype(np.uint8)
189
+ image = Image.fromarray(color_seg)
190
+
191
+
192
+ controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
193
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
194
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
195
+ )
196
+
197
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
198
+ pipe.enable_model_cpu_offload()
199
+
200
+ generator = torch.manual_seed(0)
201
+ out_image = pipe(prompt, num_inference_steps=30, generator=generator, image=image).images[0]
202
+
203
+ path = os.path.join(Path.home(), "images", "aa.png")
204
+ out_image.save(path)
205
+
206
+ api = HfApi()
207
+
208
+ api.upload_file(
209
+ path_or_fileobj=path,
210
+ path_in_repo=path.split("/")[-1],
211
+ repo_id="patrickvonplaten/images",
212
+ repo_type="dataset",
213
+ )
214
+ print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png")
run_decomposed_if.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import IFBasePipeline, IFSuperResolutionPipeline, UNet2DConditionModel
3
+ from transformers import T5EncoderModel, T5Tokenizer
4
+ import torch
5
+ import gc
6
+ import os
7
+ from pathlib import Path
8
+
9
+ prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
10
+
11
+ model_id = "diffusers/if"
12
+ model_id = "/home/patrick/if"
13
+
14
+ # T5
15
+ t5_tok = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
16
+
17
+ t5 = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16", low_cpu_mem_usage=True)
18
+ t5.cuda()
19
+
20
+ prompt = prompt.lower().strip() # make sure everything is lower-cased
21
+ with torch.no_grad():
22
+ inputs = t5_tok(prompt, max_length=77, return_tensors="pt", truncation=True, padding="max_length").to("cuda")
23
+ prompt_embeds = t5(**inputs).last_hidden_state
24
+
25
+ with torch.no_grad():
26
+ inputs = t5_tok("", max_length=77, return_tensors="pt", truncation=True, padding="max_length").to("cuda")
27
+ neg_prompt_embeds = t5(**inputs).last_hidden_state
28
+
29
+ del t5
30
+ torch.cuda.empty_cache()
31
+ gc.collect()
32
+
33
+ generator = torch.Generator("cuda").manual_seed(0)
34
+
35
+ # Stage 1
36
+ pipe = IFBasePipeline.from_pretrained(model_id, text_encoder=None, torch_dtype=torch.float16, variant="fp16")
37
+ pipe.to("cuda")
38
+
39
+ image = pipe(prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, output_type="pt", num_inference_steps=100, generator=generator).images
40
+
41
+ # save_image
42
+ pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
43
+ pil_image.save(os.path.join(Path.home(), "images", "if_I_0.png"))
44
+
45
+ # offload
46
+ del pipe
47
+ torch.cuda.empty_cache()
48
+ gc.collect()
49
+
50
+ # Stage 2
51
+ unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="super_res_1_unet", torch_dtype=torch.float16)
52
+ pipe = IFSuperResolutionPipeline.from_pretrained(model_id, unet=unet, text_encoder=None, torch_dtype=torch.float16, variant="fp16")
53
+ pipe.to("cuda")
54
+
55
+ image = pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, num_inference_steps=50, noise_level=250, output_type="pt", generator=generator).images
56
+
57
+ # save_image
58
+ pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
59
+ pil_image.save(os.path.join(Path.home(), "images", "if_II_0.png"))
60
+
61
+ # offload
62
+ del pipe
63
+ torch.cuda.empty_cache()
64
+ gc.collect()
65
+
66
+ # Stage 3
67
+ unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="super_res_2_unet", torch_dtype=torch.float16)
68
+ pipe = IFSuperResolutionPipeline.from_pretrained(model_id, unet=unet, text_encoder=None, torch_dtype=torch.float16, variant="fp16")
69
+ pipe.to("cuda")
70
+
71
+ image = pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, num_inference_steps=40, noise_level=0, output_type="pt", generator=generator).images
72
+
73
+ # save image
74
+ pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
75
+ pil_image.save(os.path.join(Path.home(), "images", "if_III_0.png"))
76
+
77
+ # offload
78
+ del pipe
79
+ torch.cuda.empty_cache()
80
+ gc.collect()
run_deepfloyd.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DiffusionPipeline, DDPMScheduler
3
+ import torch
4
+ import time
5
+ import os
6
+ from pathlib import Path
7
+ from huggingface_hub import HfApi
8
+ import random
9
+ import numpy as np
10
+ from deepfloyd_if.modules import IFStageI, IFStageII, IFStageIII, T5Embedder
11
+ import sys
12
+
13
+ api = HfApi()
14
+ start_time = time.time()
15
+ seed = 0
16
+ use_diffusers = bool(int(sys.argv[1]))
17
+
18
+ t5_pos_embeds = torch.load("/home/patrick/tensors/embeds_orig.pt").to("cuda")
19
+ t5_neg_embeds = torch.load("/home/patrick/tensors/neg_embeds.pt").to("cuda")
20
+
21
+ def seed_everything(seed=None):
22
+ random.seed(seed)
23
+ os.environ['PYTHONHASHSEED'] = str(seed)
24
+ np.random.seed(seed)
25
+ torch.manual_seed(seed)
26
+ torch.cuda.manual_seed(seed)
27
+ torch.backends.cudnn.deterministic = True
28
+ torch.backends.cudnn.benchmark = True
29
+ return seed
30
+
31
+ if use_diffusers:
32
+ pipe = DiffusionPipeline.from_pretrained("/home/patrick/if-diff-ckpts/IF-I-IF-v1.0", torch_dtype=torch.float32, use_safetensors=True, text_encoder=None, safety_checker=None)
33
+ config = dict(pipe.scheduler.config)
34
+ config["timestep_spacing"] = "even_border"
35
+ pipe.scheduler = DDPMScheduler.from_config(config)
36
+ pipe.to("cuda")
37
+
38
+ with torch.no_grad():
39
+ # text_embeddings = t5.get_text_embeddings([prompt])
40
+ seed_everything(0)
41
+ out_image = pipe(prompt_embeds=t5_pos_embeds, negative_prompt_embeds=t5_neg_embeds, num_inference_steps=5).images[0]
42
+ out_image.save("/home/patrick/images/if_diff.png")
43
+ else:
44
+ if_I = IFStageI(device="cuda", dir_or_name="/home/patrick/IF-I-IF-v1.0/", model_kwargs={"precision": "fp32"})
45
+ if_I_kwargs = {}
46
+ if_I_kwargs['negative_t5_embs'] = t5_neg_embeds
47
+ if_I_kwargs['seed'] = seed
48
+ if_I_kwargs['t5_embs'] = t5_pos_embeds
49
+ if_I_kwargs['aspect_ratio'] = "1:1"
50
+ if_I_kwargs['progress'] = True
51
+ if_I_kwargs['sample_timestep_respacing'] = '5'
52
+
53
+ seed_everything(0)
54
+ stageI_generations, _ = if_I.embeddings_to_image(**if_I_kwargs)
55
+
56
+ if_I.to_images(stageI_generations)[0].save("/home/patrick/images/if_ref.png")
run_if.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #!/usr/bin/env python3
3
+ from diffusers import DiffusionPipeline
4
+ import torch
5
+ import time
6
+ import os
7
+ from pathlib import Path
8
+ from huggingface_hub import HfApi
9
+
10
+ api = HfApi()
11
+ start_time = time.time()
12
+
13
+ pipe = DiffusionPipeline.from_pretrained("/home/patrick/if", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
14
+ pipe.enable_model_cpu_offload()
15
+
16
+ generator = torch.Generator("cuda").manual_seed(0)
17
+ prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
18
+
19
+ image = pipe(prompt, generator=generator).images[0]
20
+
21
+ path = os.path.join(Path.home(), "images", "if.png")
22
+ image.save(path)
23
+
24
+ api.upload_file(
25
+ path_or_fileobj=path,
26
+ path_in_repo=path.split("/")[-1],
27
+ repo_id="patrickvonplaten/images",
28
+ repo_type="dataset",
29
+ )
30
+ print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/if.png")
run_if_orig.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from deepfloyd_if.modules import IFStageI
3
+ from time import time
4
+
5
+ start_time = time()
6
+ model = IFStageI(dir_or_name="/home/patrick/IF-I-IF-v1.0/", device="cuda")
7
+ print("Time", time() - start_time)
run_if_stages.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #!/usr/bin/env python3
3
+ from diffusers import DiffusionPipeline
4
+ import torch
5
+ import time
6
+ import os
7
+ from pathlib import Path
8
+ from huggingface_hub import HfApi
9
+
10
+ api = HfApi()
11
+ start_time = time.time()
12
+
13
+ model_prefix = "diffusers"
14
+
15
+ pipe = DiffusionPipeline.from_pretrained(f"{model_prefix}/IF-I-IF-v1.0", torch_dtype=torch.float16, safety_checker=None, variant="fp16", use_safetensors=True)
16
+ pipe.enable_model_cpu_offload()
17
+
18
+ super_res_1_pipe = DiffusionPipeline.from_pretrained(f"{model_prefix}/IF-II-L-v1.0", text_encoder=None, safety_checker=None, torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
19
+ super_res_1_pipe.enable_model_cpu_offload()
20
+
21
+ super_res_2_pipe = DiffusionPipeline.from_pretrained(f"{model_prefix}/IF-III-L-v1.0", text_encoder=None, safety_checker=None, torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
22
+ super_res_2_pipe.enable_model_cpu_offload()
23
+
24
+ prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
25
+
26
+ generator = torch.Generator("cuda").manual_seed(0)
27
+ prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)
28
+
29
+ image = pipe(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator, output_type="pt").images
30
+
31
+ # save_image
32
+ pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
33
+ pil_image.save(os.path.join(Path.home(), "images", "if_stage_I_0.png"))
34
+
35
+ image = super_res_1_pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator, output_type="pt", noise_level=250, num_inference_steps=50).images
36
+
37
+ # save_image
38
+ pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
39
+ pil_image.save(os.path.join(Path.home(), "images", "if_stage_II_0.png"))
40
+
41
+ image = super_res_2_pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, noise_level=0, num_inference_steps=40, generator=generator).images[0]
42
+
43
+ # save_image
44
+ image.save(os.path.join(Path.home(), "images", "if_stage_III_0.png"))