Files changed (1) hide show
  1. README.md +27 -44
README.md CHANGED
@@ -25,41 +25,23 @@ pip install diffusers transformers
25
  ### Text to image
26
 
27
  ```python
28
- from diffusers import KandinskyPipeline, KandinskyPriorPipeline
29
  import torch
30
 
31
-
32
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
33
  pipe_prior.to("cuda")
34
 
 
 
 
35
  prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
36
  negative_prompt = "low quality, bad quality"
37
 
38
- image_emb = pipe_prior(
39
- prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
40
- ).images
41
-
42
- zero_image_emb = pipe_prior(
43
- negative_prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
44
- ).images
45
-
46
- pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
47
- pipe.to("cuda")
48
-
49
-
50
- images = pipe(
51
- prompt,
52
- image_embeds=image_emb,
53
- negative_image_embeds=zero_image_emb,
54
- num_images_per_prompt=2,
55
- height=768,
56
- width=768,
57
- num_inference_steps=100,
58
- guidance_scale=4.0,
59
- generator=generator,
60
- ).images[0]
61
 
62
- image.save("./cheeseburger_monster.png")
 
63
  ```
64
 
65
  ![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
@@ -81,7 +63,9 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
81
  original_image = original_image.resize((768, 512))
82
 
83
  # create prior
84
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
 
 
85
  pipe_prior.to("cuda")
86
 
87
  # create img2img pipeline
@@ -91,22 +75,16 @@ pipe.to("cuda")
91
  prompt = "A fantasy landscape, Cinematic lighting"
92
  negative_prompt = "low quality, bad quality"
93
 
94
- image_emb = pipe_prior(
95
- prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
96
- ).images
97
-
98
- zero_image_emb = pipe_prior(
99
- negative_prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
100
- ).images
101
 
102
  out = pipe(
103
  prompt,
104
  image=original_image,
105
- image_embeds=image_emb,
106
- negative_image_embeds=zero_image_emb,
107
  height=768,
108
  width=768,
109
- num_inference_steps=500,
110
  strength=0.3,
111
  )
112
 
@@ -124,9 +102,10 @@ from diffusers.utils import load_image
124
  import PIL
125
 
126
  import torch
127
- from torchvision import transforms
128
 
129
- pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
 
 
130
  pipe_prior.to("cuda")
131
 
132
  img1 = load_image(
@@ -137,16 +116,20 @@ img2 = load_image(
137
  "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
138
  )
139
 
 
140
  images_texts = ["a cat", img1, img2]
 
 
141
  weights = [0.3, 0.3, 0.4]
142
- image_emb, zero_image_emb = pipe_prior.interpolate(images_texts, weights)
 
 
 
143
 
144
  pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
145
  pipe.to("cuda")
146
 
147
- image = pipe(
148
- "", image_embeds=image_emb, negative_image_embeds=zero_image_emb, height=768, width=768, num_inference_steps=150
149
- ).images[0]
150
 
151
  image.save("starry_cat.png")
152
  ```
 
25
  ### Text to image
26
 
27
  ```python
28
+ from diffusers import DiffusionPipeline
29
  import torch
30
 
31
+ pipe_prior = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
 
32
  pipe_prior.to("cuda")
33
 
34
+ t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
35
+ t2i_pipe.to("cuda")
36
+
37
  prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
38
  negative_prompt = "low quality, bad quality"
39
 
40
+ generator = torch.Generator(device="cuda").manual_seed(12)
41
+ image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ image = t2i_pipe(prompt, image_embeds=image_embeds, negative_image_embeds=negative_image_embeds).images[0]
44
+ image.save("cheeseburger_monster.png")
45
  ```
46
 
47
  ![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
 
63
  original_image = original_image.resize((768, 512))
64
 
65
  # create prior
66
+ pipe_prior = KandinskyPriorPipeline.from_pretrained(
67
+ "kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
68
+ )
69
  pipe_prior.to("cuda")
70
 
71
  # create img2img pipeline
 
75
  prompt = "A fantasy landscape, Cinematic lighting"
76
  negative_prompt = "low quality, bad quality"
77
 
78
+ generator = torch.Generator(device="cuda").manual_seed(30)
79
+ image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
 
 
 
 
 
80
 
81
  out = pipe(
82
  prompt,
83
  image=original_image,
84
+ image_embeds=image_embeds,
85
+ negative_image_embeds=negative_image_embeds,
86
  height=768,
87
  width=768,
 
88
  strength=0.3,
89
  )
90
 
 
102
  import PIL
103
 
104
  import torch
 
105
 
106
+ pipe_prior = KandinskyPriorPipeline.from_pretrained(
107
+ "kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
108
+ )
109
  pipe_prior.to("cuda")
110
 
111
  img1 = load_image(
 
116
  "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
117
  )
118
 
119
+ # add all the conditions we want to interpolate, can be either text or image
120
  images_texts = ["a cat", img1, img2]
121
+
122
+ # specify the weights for each condition in images_texts
123
  weights = [0.3, 0.3, 0.4]
124
+
125
+ # We can leave the prompt empty
126
+ prompt = ""
127
+ prior_out = pipe_prior.interpolate(images_texts, weights)
128
 
129
  pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
130
  pipe.to("cuda")
131
 
132
+ image = pipe(prompt, **prior_out, height=768, width=768).images[0]
 
 
133
 
134
  image.save("starry_cat.png")
135
  ```