xinsir
/

controlnet-tile-sdxl-1.0

Text-to-Image

Diffusers

Safetensors

Model card Files Files and versions Community

xinsir commited on Jun 27

Commit

46a61d5

•

1 Parent(s): 09d876b

Update README.md

Browse files

Files changed (1) hide show

README.md +260 -1

README.md CHANGED Viewed

@@ -38,4 +38,263 @@ support any aspect ratio and any times upscale, followings are 3 * 3 times
 ![images_7)](./000053.webp)
-![images_8)](./000053_scribble.webp)

 ![images_7)](./000053.webp)
+![images_8)](./000053_scribble.webp)
+# Code to Use Tile blur
+code reference: https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/main/TTP_tile_preprocessor_v5.py
+https://github.com/lllyasviel/ControlNet-v1-1-nightly/blob/main/gradio_tile.py
+```python
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
+from PIL import Image
+from guided_filter import FastGuidedFilter # I have upload this file in this repo
+import torch
+import numpy as np
+import cv2
+def resize_image_control(control_image, resolution):
+    HH, WW, _ = control_image.shape
+    crop_h = random.randint(0, HH - resolution[1])
+    crop_w = random.randint(0, WW - resolution[0])
+    crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :]
+    return crop_image, crop_w, crop_h
+def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0):
+    if ksize % 2 == 0:
+        ksize += 1  # ksize must be odd
+    blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
+    return blurred_image
+def apply_guided_filter(image_np, radius, eps, scale):
+    filter = FastGuidedFilter(image_np, radius, eps, scale)
+    return filter.filter(image_np)
+controlnet_conditioning_scale = 1.0
+prompt = "your prompt, the longer the better, you can describe it as detail as possible"
+negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
+eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+controlnet = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-tile-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+# when test with other base model, you need to change the vae also.
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet,
+    vae=vae,
+    safety_checker=None,
+    torch_dtype=torch.float16,
+    scheduler=eulera_scheduler,
+)
+controlnet_img = cv2.imread("your original image path")
+height, width, _  = controlnet_img.shape
+ratio = np.sqrt(1024. * 1024. / (width * height))
+W, H = int(width * ratio), int(height * ratio)
+crop_w, crop_h = 0, 0
+controlnet_img = cv2.resize(controlnet_img, (W, H))
+blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
+radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
+eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
+scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]
+if random.random() > 0.5:
+    controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)
+if random.random() > 0.5:
+    # Apply Guided Filter
+    controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor)
+# Resize image
+controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA)
+controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC)
+controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
+controlnet_img = Image.fromarray(controlnet_img)
+# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
+images = pipe(
+    prompt,
+    negative_prompt=negative_prompt,
+    image=controlnet_img,
+    controlnet_conditioning_scale=controlnet_conditioning_scale,
+    width=new_width,
+    height=new_height,
+    num_inference_steps=30,
+    ).images
+images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
+```
+# Code to Use Tile var
+Use more detail prompt to regerate can help!
+```python
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
+from PIL import Image
+import torch
+import numpy as np
+import cv2
+controlnet_conditioning_scale = 1.0
+prompt = "your prompt, the longer the better, you can describe it as detail as possible"
+negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
+eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+controlnet = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-tile-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+# when test with other base model, you need to change the vae also.
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet,
+    vae=vae,
+    safety_checker=None,
+    torch_dtype=torch.float16,
+    scheduler=eulera_scheduler,
+)
+controlnet_img = cv2.imread("your original image path")
+height, width, _  = controlnet_img.shape
+ratio = np.sqrt(1024. * 1024. / (width * height))
+W, H = int(width * ratio), int(height * ratio)
+crop_w, crop_h = 0, 0
+controlnet_img = cv2.resize(controlnet_img, (W, H))
+controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
+controlnet_img = Image.fromarray(controlnet_img)
+# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
+images = pipe(
+    prompt,
+    negative_prompt=negative_prompt,
+    image=controlnet_img,
+    controlnet_conditioning_scale=controlnet_conditioning_scale,
+    width=new_width,
+    height=new_height,
+    num_inference_steps=30,
+    ).images
+images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
+```
+# Code to Use Tile super
+performance may unstable and next version is optimizing!
+```python
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
+from PIL import Image
+import torch
+import numpy as np
+import cv2
+controlnet_conditioning_scale = 1.0
+prompt = "your prompt, the longer the better, you can describe it as detail as possible"
+negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
+eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+controlnet = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-tile-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+# when test with other base model, you need to change the vae also.
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet,
+    vae=vae,
+    safety_checker=None,
+    torch_dtype=torch.float16,
+    scheduler=eulera_scheduler,
+)
+controlnet_img = cv2.imread("your original image path")
+height, width, _  = controlnet_img.shape
+ratio = np.sqrt(1024. * 1024. / (width * height))
+W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
+controlnet_img = cv2.resize(controlnet_img, (W, H))
+controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
+controlnet_img = Image.fromarray(controlnet_img)
+# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
+target_width = W // 3
+target_height = H // 3
+for i in range(3):  # 两行
+  for j in range(3):  # 两列
+    left = j * target_width
+    top = i * target_height
+    right = left + target_width
+    bottom = top + target_height
+    # 根据计算的边界裁剪图像
+    cropped_image = controlnet_img.crop((left, top, right, bottom))
+    cropped_image = cropped_image.resize((W, H))
+    images.append(cropped_image)
+seed = random.randint(0, 2147483647)
+generator = torch.Generator('cuda').manual_seed(seed)
+result_images = []
+for sub_img in images:
+  new_width, new_height = W, H
+  out = sd_model(prompt=[prompt]*1,
+                    image=sub_img,
+                    control_image=sub_img,
+                    negative_prompt=[negative_prompt]*1,
+                    generator=generator,
+                    width=new_width,
+                    height=new_height,
+                    num_inference_steps=30,
+                    crops_coords_top_left=(W, H),
+                    target_size=(W, H),
+                    original_size=(W * 2, H * 2),
+                )
+  result_images.append(out.images[0])
+new_im = Image.new('RGB', (new_width*3, new_height*3))
+# 拼接图片到新的图像上
+new_im.paste(result_images[0], (0, 0))
+new_im.paste(result_images[1], (new_width, 0))
+new_im.paste(result_images[2], (new_width * 2, 0))
+new_im.paste(result_images[3], (0, new_height))
+new_im.paste(result_images[4], (new_width, new_height))
+new_im.paste(result_images[5], (new_width * 2, new_height))
+new_im.paste(result_images[6], (0, new_height * 2))
+new_im.paste(result_images[7], (new_width, new_height * 2))
+new_im.paste(result_images[8], (new_width * 2, new_height * 2))
+new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
+```