img2img question

#12
by terekita - opened

So excited about this space, thank you so much!!! I'm trying to use this for img2img, and tried the following in colab (error follows the code):

from os import pipe2
import torch
from diffusers import AutoPipelineForImage2Image
from diffusers.utils import make_image_grid, load_image

pipe = AutoPipelineForImage2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)
pipe.enable_model_cpu_offload()

# remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed
#pipe.enable_xformers_memory_efficient_attention()

pipe.load_lora_weights('terekita/beingstyle', weight_name='pytorch_lora_weights.safetensors')
embedding_path = hf_hub_download(repo_id='terekita/beingstyle', filename='beingstyle_emb.safetensors', repo_type="model")
state_dict = load_file(embedding_path)
pipe.load_textual_inversion(state_dict["clip_l"], token=["<s0>", "<s1>"], text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer)
pipe.load_textual_inversion(state_dict["clip_g"], token=["<s0>", "<s1>"], text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2)

# prepare image

init_image = load_image("/content/terekita_httpss.mj.run1BBwqkVf-Eg_the_garden_of_earthly_delight_3dbe2600-498f-48ea-84d4-ef3afb08f8a7.png")

prompt = "in the style of <s0><s1>"

# pass prompt and image to pipeline
image = pipeline(prompt, image=init_image, strength=0.5).images[0]
make_image_grid([init_image, image], rows=1, cols=2)

Error:

ValueError                                Traceback (most recent call last)

<ipython-input-11-c97800383aa6> in <cell line: 14>()
     12 #pipe.enable_xformers_memory_efficient_attention()
     13 
---> 14 pipe.load_lora_weights('terekita/beingstyle', weight_name='pytorch_lora_weights.safetensors')
     15 embedding_path = hf_hub_download(repo_id='terekita/beingstyle', filename='beingstyle_emb.safetensors', repo_type="model")
     16 state_dict = load_file(embedding_path)

4 frames

/usr/local/lib/python3.10/dist-packages/diffusers/models/modeling_utils.py in load_model_dict_into_meta(model, state_dict, device, dtype, model_name_or_path)
    152         if empty_state_dict[param_name].shape != param.shape:
    153             model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else ""
--> 154             raise ValueError(
    155                 f"Cannot load {model_name_or_path_str}because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example."
    156             )

ValueError: Cannot load because down.weight expected shape tensor(..., device='meta', size=(32, 768)), but got torch.Size([32, 640]). If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example.

nm, answering own question, this works (don't use the refiner), and again, thank you for this space!:

import torch

from diffusers import StableDiffusionXLImg2ImgPipeline

from diffusers.utils import load_image

pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(

    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16

)

pipe = pipe.to("cuda")
pipe.load_lora_weights('terekita/beingstyle', weight_name='pytorch_lora_weights.safetensors')
embedding_path = hf_hub_download(repo_id='terekita/beingstyle', filename='beingstyle_emb.safetensors', repo_type="model")
state_dict = load_file(embedding_path)
pipe.load_textual_inversion(state_dict["clip_l"], token=["<s0>", "<s1>"], text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer)
pipe.load_textual_inversion(state_dict["clip_g"], token=["<s0>", "<s1>"], text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2)


# prepare image

init_image = load_image("/content/terekita_httpss.mj.run1BBwqkVf-Eg_the_garden_of_earthly_delight_3dbe2600-498f-48ea-84d4-ef3afb08f8a7.png")

prompt = "in the style of <s0><s1>"

# pass prompt and image to pipeline
image = pipe(prompt, image=init_image, strength=0.5).images[0]
make_image_grid([init_image, image], rows=1, cols=2)
terekita changed discussion status to closed

Sign up or log in to comment