Update README.md
#3
by
YiYiXu
- opened
README.md
CHANGED
@@ -25,38 +25,23 @@ pip install diffusers transformers
|
|
25 |
from diffusers import KandinskyPipeline, KandinskyPriorPipeline
|
26 |
import torch
|
27 |
|
|
|
|
|
28 |
|
29 |
-
pipe_prior =
|
30 |
pipe_prior.to("cuda")
|
31 |
|
|
|
|
|
|
|
32 |
prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
|
33 |
negative_prompt = "low quality, bad quality"
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
).images
|
38 |
-
|
39 |
-
zero_image_emb = pipe_prior(
|
40 |
-
negative_prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
|
41 |
-
).images
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
images = pipe(
|
48 |
-
prompt,
|
49 |
-
image_embeds=image_emb,
|
50 |
-
negative_image_embeds=zero_image_emb,
|
51 |
-
num_images_per_prompt=2,
|
52 |
-
height=768,
|
53 |
-
width=768,
|
54 |
-
num_inference_steps=100,
|
55 |
-
guidance_scale=4.0,
|
56 |
-
generator=generator,
|
57 |
-
).images[0]
|
58 |
-
|
59 |
-
image.save("./cheeseburger_monster.png")
|
60 |
```
|
61 |
|
62 |
![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
|
@@ -78,7 +63,9 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
|
|
78 |
original_image = original_image.resize((768, 512))
|
79 |
|
80 |
# create prior
|
81 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
82 |
pipe_prior.to("cuda")
|
83 |
|
84 |
# create img2img pipeline
|
@@ -88,22 +75,16 @@ pipe.to("cuda")
|
|
88 |
prompt = "A fantasy landscape, Cinematic lighting"
|
89 |
negative_prompt = "low quality, bad quality"
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
).images
|
94 |
-
|
95 |
-
zero_image_emb = pipe_prior(
|
96 |
-
negative_prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
|
97 |
-
).images
|
98 |
|
99 |
out = pipe(
|
100 |
prompt,
|
101 |
image=original_image,
|
102 |
-
image_embeds=
|
103 |
-
negative_image_embeds=
|
104 |
height=768,
|
105 |
width=768,
|
106 |
-
num_inference_steps=500,
|
107 |
strength=0.3,
|
108 |
)
|
109 |
|
@@ -121,11 +102,13 @@ from diffusers.utils import load_image
|
|
121 |
import torch
|
122 |
import numpy as np
|
123 |
|
124 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
125 |
pipe_prior.to("cuda")
|
126 |
|
127 |
prompt = "a hat"
|
128 |
-
|
129 |
|
130 |
pipe = KandinskyInpaintPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-inpaint", torch_dtype=torch.float16)
|
131 |
pipe.to("cuda")
|
@@ -135,14 +118,14 @@ init_image = load_image(
|
|
135 |
)
|
136 |
|
137 |
mask = np.ones((768, 768), dtype=np.float32)
|
|
|
138 |
mask[:250, 250:-250] = 0
|
139 |
|
140 |
out = pipe(
|
141 |
prompt,
|
142 |
image=init_image,
|
143 |
mask_image=mask,
|
144 |
-
|
145 |
-
negative_image_embeds=zero_image_emb,
|
146 |
height=768,
|
147 |
width=768,
|
148 |
num_inference_steps=150,
|
@@ -162,9 +145,10 @@ from diffusers.utils import load_image
|
|
162 |
import PIL
|
163 |
|
164 |
import torch
|
165 |
-
from torchvision import transforms
|
166 |
|
167 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
168 |
pipe_prior.to("cuda")
|
169 |
|
170 |
img1 = load_image(
|
@@ -175,16 +159,20 @@ img2 = load_image(
|
|
175 |
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
|
176 |
)
|
177 |
|
|
|
178 |
images_texts = ["a cat", img1, img2]
|
|
|
|
|
179 |
weights = [0.3, 0.3, 0.4]
|
180 |
-
|
|
|
|
|
|
|
181 |
|
182 |
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
183 |
pipe.to("cuda")
|
184 |
|
185 |
-
image = pipe(
|
186 |
-
"", image_embeds=image_emb, negative_image_embeds=zero_image_emb, height=768, width=768, num_inference_steps=150
|
187 |
-
).images[0]
|
188 |
|
189 |
image.save("starry_cat.png")
|
190 |
```
|
|
|
25 |
from diffusers import KandinskyPipeline, KandinskyPriorPipeline
|
26 |
import torch
|
27 |
|
28 |
+
from diffusers import DiffusionPipeline
|
29 |
+
import torch
|
30 |
|
31 |
+
pipe_prior = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
|
32 |
pipe_prior.to("cuda")
|
33 |
|
34 |
+
t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
35 |
+
t2i_pipe.to("cuda")
|
36 |
+
|
37 |
prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
|
38 |
negative_prompt = "low quality, bad quality"
|
39 |
|
40 |
+
generator = torch.Generator(device="cuda").manual_seed(12)
|
41 |
+
image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
image = t2i_pipe(prompt, image_embeds=image_embeds, negative_image_embeds=negative_image_embeds).images[0]
|
44 |
+
image.save("cheeseburger_monster.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
```
|
46 |
|
47 |
![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
|
|
|
63 |
original_image = original_image.resize((768, 512))
|
64 |
|
65 |
# create prior
|
66 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
67 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
68 |
+
)
|
69 |
pipe_prior.to("cuda")
|
70 |
|
71 |
# create img2img pipeline
|
|
|
75 |
prompt = "A fantasy landscape, Cinematic lighting"
|
76 |
negative_prompt = "low quality, bad quality"
|
77 |
|
78 |
+
generator = torch.Generator(device="cuda").manual_seed(30)
|
79 |
+
image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
out = pipe(
|
82 |
prompt,
|
83 |
image=original_image,
|
84 |
+
image_embeds=image_embeds,
|
85 |
+
negative_image_embeds=negative_image_embeds,
|
86 |
height=768,
|
87 |
width=768,
|
|
|
88 |
strength=0.3,
|
89 |
)
|
90 |
|
|
|
102 |
import torch
|
103 |
import numpy as np
|
104 |
|
105 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
106 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
107 |
+
)
|
108 |
pipe_prior.to("cuda")
|
109 |
|
110 |
prompt = "a hat"
|
111 |
+
prior_output = pipe_prior(prompt)
|
112 |
|
113 |
pipe = KandinskyInpaintPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-inpaint", torch_dtype=torch.float16)
|
114 |
pipe.to("cuda")
|
|
|
118 |
)
|
119 |
|
120 |
mask = np.ones((768, 768), dtype=np.float32)
|
121 |
+
# Let's mask out an area above the cat's head
|
122 |
mask[:250, 250:-250] = 0
|
123 |
|
124 |
out = pipe(
|
125 |
prompt,
|
126 |
image=init_image,
|
127 |
mask_image=mask,
|
128 |
+
**prior_output,
|
|
|
129 |
height=768,
|
130 |
width=768,
|
131 |
num_inference_steps=150,
|
|
|
145 |
import PIL
|
146 |
|
147 |
import torch
|
|
|
148 |
|
149 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
150 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
151 |
+
)
|
152 |
pipe_prior.to("cuda")
|
153 |
|
154 |
img1 = load_image(
|
|
|
159 |
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
|
160 |
)
|
161 |
|
162 |
+
# add all the conditions we want to interpolate, can be either text or image
|
163 |
images_texts = ["a cat", img1, img2]
|
164 |
+
|
165 |
+
# specify the weights for each condition in images_texts
|
166 |
weights = [0.3, 0.3, 0.4]
|
167 |
+
|
168 |
+
# We can leave the prompt empty
|
169 |
+
prompt = ""
|
170 |
+
prior_out = pipe_prior.interpolate(images_texts, weights)
|
171 |
|
172 |
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
173 |
pipe.to("cuda")
|
174 |
|
175 |
+
image = pipe(prompt, **prior_out, height=768, width=768).images[0]
|
|
|
|
|
176 |
|
177 |
image.save("starry_cat.png")
|
178 |
```
|