Text-to-Image
Diffusers
PyTorch
Safetensors
IFPipeline
if

This PR adds the diffusers weights of IF. We tested it using:

from diffusers import DiffusionPipeline
import torch
from diffusers.training_utils import enable_full_determinism
import gc
​
enable_full_determinism(0)
​
generator = torch.Generator().manual_seed(2)
​
pipe = DiffusionPipeline.from_pretrained("./IF-I-IF-v1.0", torch_dtype=torch.float32)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.to('cuda')
​
prompt = "a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says 'very deep learning'"
​
prompt_embeds, negative_embeds = pipe.encode_prompt(prompt, clean_caption=True)
​
image = pipe(
    prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, timesteps=smart100_timesteps, generator=generator, output_type="pt"
).images
​
del pipe
gc.collect()
torch.cuda.empty_cache()
​
# save intermediate image
pil_image = image
pil_image = (pil_image / 2 + 0.5).clamp(0, 1)
pil_image = pil_image.cpu().permute(0, 2, 3, 1).float().numpy()
pil_image = DiffusionPipeline.numpy_to_pil(pil_image)[0]
pil_image.save("./if_stage_I.png")
​
pipe = DiffusionPipeline.from_pretrained("./IF-II-L-v1.0", torch_dtype=torch.float32, text_encoder=None)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.to('cuda')
​
image = pipe(
    image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, timesteps=smart50_timesteps, generator=generator, output_type="pt"
).images
​
del pipe
gc.collect()
torch.cuda.empty_cache()
​
pil_image = image
pil_image = (pil_image / 2 + 0.5).clamp(0, 1)
pil_image = pil_image.cpu().permute(0, 2, 3, 1).float().numpy()
pil_image = DiffusionPipeline.numpy_to_pil(pil_image)[0]
pil_image.save("./if_stage_II.png")

and compared it to IF:

import torch
import sys
sys.path.insert(0, 'IF')
from deepfloyd_if.modules import IFStageI, IFStageII, IFStageIII
from deepfloyd_if.modules.t5 import T5Embedder
from deepfloyd_if.pipelines import dream
import torch
​
from diffusers.training_utils import enable_full_determinism
enable_full_determinism(0)
​
device = 'cuda:0'def t5():
    return T5Embedder(device=device, dir_or_name="../IF-I-IF-v1.0/text_encoder", torch_dtype=torch.float32)
​
def if_I():
    return IFStageI('../if/IF-I-IF-v0.2rc/', device=device, 
                model_kwargs={"precision": "32"}
                )
​
def if_II():
    return IFStageII('../if/IF-II-L-v0.2rc/', device=device, 
                  model_kwargs={"precision": "32"}
                  )
​
prompt = "a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says 'very deep learning'"
​
result = dream(
    t5=t5, if_I=if_I, if_II=if_II
    prompt=[prompt],
    if_I_kwargs={
        "guidance_scale": 7.0,
        "sample_timestep_respacing": "smart100",
    },
    if_II_kwargs={
        "guidance_scale": 4.0,
        "sample_timestep_respacing": "smart50",
    },
    seed=None, # not used with our code changes
    disable_watermark=True
)
​
image = result['I'][0]
image.save("./if_stage_I.png")
​
image = result['II'][0]
image.save("./if_stage_II.png")

Original picture
if_stage_III_orig.png

Diffusers picture
if_stage_III_diffusers.png

Gugutse changed pull request status to merged

Sign up or log in to comment