File size: 3,026 Bytes

300ee94

# Diffusers' ControlNet Implementation Subjective Evaluation

import torch
import os

from diffusers import DiffusionPipeline, ControlNetModel, DDIMScheduler

from PIL import Image

test_prompt = "best quality, extremely detailed"
test_negative_prompt = "blur, lowres, bad anatomy, worst quality, low quality"

def resize_for_condition_image(input_image: Image, resolution: int):
    input_image = input_image.convert("RGB")
    W, H = input_image.size
    k = float(resolution) / min(H, W)
    H *= k
    W *= k
    H = int(round(H / 64.0)) * 64
    W = int(round(W / 64.0)) * 64
    img = input_image.resize((W, H), resample=Image.LANCZOS if k > 1 else Image.AREA)
    return img

def generate_image(seed, prompt, negative_prompt, control, guess_mode=False):
    latent = torch.randn(
        (1, 4, 64, 64),
        device="cpu",
        generator=torch.Generator(device="cpu").manual_seed(seed),
    ).cuda()
    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=4.0 if guess_mode else 9.0,
        num_inference_steps=50 if guess_mode else 20,
        latents=latent,
        image=control,
        controlnet_conditioning_image=control,
        strength=1.0,
        # guess_mode=guess_mode,
    ).images[0]
    return image


if __name__ == "__main__":
    model_name = "f1e_sd15_tile"
    original_image_folder = "./control_images/"
    control_image_folder = "./control_images/converted/"
    output_image_folder = "./output_images/diffusers/"
    os.makedirs(output_image_folder, exist_ok=True)

    # model_id = f"lllyasviel/control_v11{model_name}"
    # controlnet = ControlNetModel.from_pretrained(model_id)
    controlnet = ControlNetModel.from_pretrained('takuma104/control_v11', 
                                                subfolder='control_v11f1e_sd15_tile')

    if model_name == "p_sd15s2_lineart_anime":
        base_model_id = "Linaqruf/anything-v3.0"
        base_model_revision = None
    else:
        base_model_id = "runwayml/stable-diffusion-v1-5"
        base_model_revision = "non-ema"

    pipe = DiffusionPipeline.from_pretrained(
        base_model_id,
        revision=base_model_revision,
        custom_pipeline="stable_diffusion_controlnet_img2img",
        controlnet=controlnet,
        safety_checker=None,
    ).to("cuda")
    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)

    original_image_filenames = [
        "dog_64x64.png",
    ]

    image_conditions = [
        resize_for_condition_image(
            Image.open(f"{original_image_folder}{fn}"),
            resolution=512,
        )
        for fn in original_image_filenames
    ]

    for i, control in enumerate(image_conditions):
        for seed in range(4):
            image = generate_image(
                seed=seed,
                prompt=test_prompt,
                negative_prompt=test_negative_prompt,
                control=control,
            )
            image.save(f"{output_image_folder}output_{model_name}_{i}_{seed}.png")