--- license: apache-2.0 tags: - text-to-image - kandinsky inference: false --- # Kandinsky 3.0 IP Adapter ## Usage ```python pip install git+https://github.com/ai-forever/kandinsky3-diffusers.git ``` ### Image variations ```python from diffusers.models.attention_processor import Kandi3AttnProcessorIpAdapter, Kandi3AttnProcessor from diffusers.pipelines.kandinsky3.kandinsky3_pipeline_ip_adapter import KandinskyV3PipelineIpAdapter from PIL import Image import torch pipe = KandinskyV3PipelineIpAdapter.from_pretrained('ai-forever/kandinsky3_ip_adapter', torch_dtype=torch.float16, low_cpu_mem_usage=False, device_map=None) pipe = pipe.to('cuda') img = Image.open('path_to_img.jpg') out_img = pipe('4k caption', img=[img], weights=[1], negative_prompt='', height=1024, width=1024, guidance_scale=7.5, num_inference_steps=50, cut_context=1)[0][0] ``` ### Image + Image mixing ```python from diffusers.models.attention_processor import Kandi3AttnProcessorIpAdapter, Kandi3AttnProcessor from diffusers.pipelines.kandinsky3.kandinsky3_pipeline_ip_adapter import KandinskyV3PipelineIpAdapter from PIL import Image import torch pipe = KandinskyV3PipelineIpAdapter.from_pretrained('ai-forever/kandinsky3_ip_adapter', torch_dtype=torch.float16, low_cpu_mem_usage=False, device_map=None) pipe = pipe.to('cuda') img1 = Image.open('path_to_img1.jpg') img2 = Image.open('path_to_img2.jpg') out_img = pipe('4k photo', img=[img1, img2], weights=[0.5, 0.5], negative_prompt='', height=1024, width=1024, guidance_scale=7.5, num_inference_steps=50, cut_context=1)[0][0] ``` ### Text + Image mixing ```python from diffusers.models.attention_processor import Kandi3AttnProcessorIpAdapter, Kandi3AttnProcessor from diffusers.pipelines.kandinsky3.kandinsky3_pipeline_ip_adapter import KandinskyV3PipelineIpAdapter from PIL import Image import torch pipe = KandinskyV3PipelineIpAdapter.from_pretrained('ai-forever/kandinsky3_ip_adapter', torch_dtype=torch.float16, low_cpu_mem_usage=False, device_map=None) pipe = pipe.to('cuda') img = Image.open('path_to_img.jpg') caption = 'cat, 4k photo' out_img = pipe(caption, img=[img], weights=[1], negative_prompt='', height=1024, width=1024, guidance_scale=7.5, num_inference_steps=50, cut_context=1, img_weight=0.5)[0][0] ```