Files changed (1) hide show
  1. README.md +34 -46
README.md CHANGED
@@ -25,38 +25,23 @@ pip install diffusers transformers
25
  from diffusers import KandinskyPipeline, KandinskyPriorPipeline
26
  import torch
27
 
 
 
28
 
29
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
30
  pipe_prior.to("cuda")
31
 
 
 
 
32
  prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
33
  negative_prompt = "low quality, bad quality"
34
 
35
- image_emb = pipe_prior(
36
- prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
37
- ).images
38
-
39
- zero_image_emb = pipe_prior(
40
- negative_prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
41
- ).images
42
 
43
- pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
44
- pipe.to("cuda")
45
-
46
-
47
- images = pipe(
48
- prompt,
49
- image_embeds=image_emb,
50
- negative_image_embeds=zero_image_emb,
51
- num_images_per_prompt=2,
52
- height=768,
53
- width=768,
54
- num_inference_steps=100,
55
- guidance_scale=4.0,
56
- generator=generator,
57
- ).images[0]
58
-
59
- image.save("./cheeseburger_monster.png")
60
  ```
61
 
62
  ![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
@@ -78,7 +63,9 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
78
  original_image = original_image.resize((768, 512))
79
 
80
  # create prior
81
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
 
 
82
  pipe_prior.to("cuda")
83
 
84
  # create img2img pipeline
@@ -88,22 +75,16 @@ pipe.to("cuda")
88
  prompt = "A fantasy landscape, Cinematic lighting"
89
  negative_prompt = "low quality, bad quality"
90
 
91
- image_emb = pipe_prior(
92
- prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
93
- ).images
94
-
95
- zero_image_emb = pipe_prior(
96
- negative_prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
97
- ).images
98
 
99
  out = pipe(
100
  prompt,
101
  image=original_image,
102
- image_embeds=image_emb,
103
- negative_image_embeds=zero_image_emb,
104
  height=768,
105
  width=768,
106
- num_inference_steps=500,
107
  strength=0.3,
108
  )
109
 
@@ -121,11 +102,13 @@ from diffusers.utils import load_image
121
  import torch
122
  import numpy as np
123
 
124
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
 
 
125
  pipe_prior.to("cuda")
126
 
127
  prompt = "a hat"
128
- image_emb, zero_image_emb = pipe_prior(prompt, return_dict=False)
129
 
130
  pipe = KandinskyInpaintPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-inpaint", torch_dtype=torch.float16)
131
  pipe.to("cuda")
@@ -135,14 +118,14 @@ init_image = load_image(
135
  )
136
 
137
  mask = np.ones((768, 768), dtype=np.float32)
 
138
  mask[:250, 250:-250] = 0
139
 
140
  out = pipe(
141
  prompt,
142
  image=init_image,
143
  mask_image=mask,
144
- image_embeds=image_emb,
145
- negative_image_embeds=zero_image_emb,
146
  height=768,
147
  width=768,
148
  num_inference_steps=150,
@@ -162,9 +145,10 @@ from diffusers.utils import load_image
162
  import PIL
163
 
164
  import torch
165
- from torchvision import transforms
166
 
167
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
 
 
168
  pipe_prior.to("cuda")
169
 
170
  img1 = load_image(
@@ -175,16 +159,20 @@ img2 = load_image(
175
  "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
176
  )
177
 
 
178
  images_texts = ["a cat", img1, img2]
 
 
179
  weights = [0.3, 0.3, 0.4]
180
- image_emb, zero_image_emb = pipe_prior.interpolate(images_texts, weights)
 
 
 
181
 
182
  pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
183
  pipe.to("cuda")
184
 
185
- image = pipe(
186
- "", image_embeds=image_emb, negative_image_embeds=zero_image_emb, height=768, width=768, num_inference_steps=150
187
- ).images[0]
188
 
189
  image.save("starry_cat.png")
190
  ```
 
25
  from diffusers import KandinskyPipeline, KandinskyPriorPipeline
26
  import torch
27
 
28
+ from diffusers import DiffusionPipeline
29
+ import torch
30
 
31
+ pipe_prior = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
32
  pipe_prior.to("cuda")
33
 
34
+ t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
35
+ t2i_pipe.to("cuda")
36
+
37
  prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
38
  negative_prompt = "low quality, bad quality"
39
 
40
+ generator = torch.Generator(device="cuda").manual_seed(12)
41
+ image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
 
 
 
 
 
42
 
43
+ image = t2i_pipe(prompt, image_embeds=image_embeds, negative_image_embeds=negative_image_embeds).images[0]
44
+ image.save("cheeseburger_monster.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  ```
46
 
47
  ![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
 
63
  original_image = original_image.resize((768, 512))
64
 
65
  # create prior
66
+ pipe_prior = KandinskyPriorPipeline.from_pretrained(
67
+ "kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
68
+ )
69
  pipe_prior.to("cuda")
70
 
71
  # create img2img pipeline
 
75
  prompt = "A fantasy landscape, Cinematic lighting"
76
  negative_prompt = "low quality, bad quality"
77
 
78
+ generator = torch.Generator(device="cuda").manual_seed(30)
79
+ image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
 
 
 
 
 
80
 
81
  out = pipe(
82
  prompt,
83
  image=original_image,
84
+ image_embeds=image_embeds,
85
+ negative_image_embeds=negative_image_embeds,
86
  height=768,
87
  width=768,
 
88
  strength=0.3,
89
  )
90
 
 
102
  import torch
103
  import numpy as np
104
 
105
+ pipe_prior = KandinskyPriorPipeline.from_pretrained(
106
+ "kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
107
+ )
108
  pipe_prior.to("cuda")
109
 
110
  prompt = "a hat"
111
+ prior_output = pipe_prior(prompt)
112
 
113
  pipe = KandinskyInpaintPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-inpaint", torch_dtype=torch.float16)
114
  pipe.to("cuda")
 
118
  )
119
 
120
  mask = np.ones((768, 768), dtype=np.float32)
121
+ # Let's mask out an area above the cat's head
122
  mask[:250, 250:-250] = 0
123
 
124
  out = pipe(
125
  prompt,
126
  image=init_image,
127
  mask_image=mask,
128
+ **prior_output,
 
129
  height=768,
130
  width=768,
131
  num_inference_steps=150,
 
145
  import PIL
146
 
147
  import torch
 
148
 
149
+ pipe_prior = KandinskyPriorPipeline.from_pretrained(
150
+ "kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
151
+ )
152
  pipe_prior.to("cuda")
153
 
154
  img1 = load_image(
 
159
  "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
160
  )
161
 
162
+ # add all the conditions we want to interpolate, can be either text or image
163
  images_texts = ["a cat", img1, img2]
164
+
165
+ # specify the weights for each condition in images_texts
166
  weights = [0.3, 0.3, 0.4]
167
+
168
+ # We can leave the prompt empty
169
+ prompt = ""
170
+ prior_out = pipe_prior.interpolate(images_texts, weights)
171
 
172
  pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
173
  pipe.to("cuda")
174
 
175
+ image = pipe(prompt, **prior_out, height=768, width=768).images[0]
 
 
176
 
177
  image.save("starry_cat.png")
178
  ```