from typing import Callable import PIL.Image import torch from controlnet_aux import ( CannyDetector, LineartDetector, MidasDetector, PidiNetDetector, ZoeDetector, ) from diffusers import ( AutoencoderKL, EulerAncestralDiscreteScheduler, StableDiffusionXLAdapterPipeline, T2IAdapter, ) ADAPTER_NAMES = [ "TencentARC/t2i-adapter-canny-sdxl-1.0", "TencentARC/t2i-adapter-sketch-sdxl-1.0", "TencentARC/t2i-adapter-lineart-sdxl-1.0", "TencentARC/t2i-adapter-depth-midas-sdxl-1.0", "TencentARC/t2i-adapter-depth-zoe-sdxl-1.0", "TencentARC/t2i-adapter-recolor-sdxl-1.0", ] class CannyPreprocessor: def __init__(self): self.model = CannyDetector() def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image: return self.model(image, detect_resolution=384, image_resolution=1024) class LineartPreprocessor: def __init__(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = LineartDetector.from_pretrained("lllyasviel/Annotators").to(device) def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image: return self.model(image, detect_resolution=384, image_resolution=1024) class MidasPreprocessor: def __init__(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = MidasDetector.from_pretrained( "valhalla/t2iadapter-aux-models", filename="dpt_large_384.pt", model_type="dpt_large" ).to(device) def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image: return self.model(image, detect_resolution=512, image_resolution=1024) class PidiNetPreprocessor: def __init__(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = PidiNetDetector.from_pretrained("lllyasviel/Annotators").to(device) def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image: return self.model(image, detect_resolution=512, image_resolution=1024, apply_filter=True) class RecolorPreprocessor: def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image: return image.convert("L").convert("RGB") class ZoePreprocessor: def __init__(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = ZoeDetector.from_pretrained( "valhalla/t2iadapter-aux-models", filename="zoed_nk.pth", model_type="zoedepth_nk" ).to(device) def __call__(self, image: PIL.Image.Image) -> PIL.Image.Image: return self.model(image, gamma_corrected=True) def get_preprocessor(adapter_name: str) -> Callable[[PIL.Image.Image], PIL.Image.Image]: if adapter_name == "TencentARC/t2i-adapter-canny-sdxl-1.0": return CannyPreprocessor() elif adapter_name == "TencentARC/t2i-adapter-sketch-sdxl-1.0": return PidiNetPreprocessor() elif adapter_name == "TencentARC/t2i-adapter-lineart-sdxl-1.0": return LineartPreprocessor() elif adapter_name == "TencentARC/t2i-adapter-depth-midas-sdxl-1.0": return MidasPreprocessor() elif adapter_name == "TencentARC/t2i-adapter-depth-zoe-sdxl-1.0": return ZoePreprocessor() elif adapter_name == "TencentARC/t2i-adapter-recolor-sdxl-1.0": return RecolorPreprocessor() else: raise ValueError(f"Adapter name must be one of {ADAPTER_NAMES}") class Model: MAX_NUM_INFERENCE_STEPS = 50 def __init__(self, adapter_name: str): if adapter_name not in ADAPTER_NAMES: raise ValueError(f"Adapter name must be one of {ADAPTER_NAMES}") self.adapter_name = adapter_name self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): self.preprocessor = get_preprocessor(adapter_name) model_id = "stabilityai/stable-diffusion-xl-base-1.0" adapter = T2IAdapter.from_pretrained( adapter_name, torch_dtype=torch.float16, varient="fp16", ).to(self.device) euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler") vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) self.pipe = StableDiffusionXLAdapterPipeline.from_pretrained( model_id, vae=vae, adapter=adapter, scheduler=euler_a, torch_dtype=torch.float16, variant="fp16", ).to(self.device) self.pipe.enable_xformers_memory_efficient_attention() else: self.pipe = None def change_adapter(self, adapter_name: str) -> None: if not torch.cuda.is_available(): raise RuntimeError("This demo does not work on CPU.") if adapter_name not in ADAPTER_NAMES: raise ValueError(f"Adapter name must be one of {ADAPTER_NAMES}") if adapter_name == self.adapter_name: return self.preprocessor = None # type: ignore torch.cuda.empty_cache() self.preprocessor = get_preprocessor(adapter_name) self.pipe.adapter = None torch.cuda.empty_cache() self.pipe.adapter = T2IAdapter.from_pretrained( adapter_name, torch_dtype=torch.float16, varient="fp16", ).to(self.device) def resize_image(self, image: PIL.Image.Image) -> PIL.Image.Image: w, h = image.size scale = 1024 / max(w, h) new_w = int(w * scale) new_h = int(h * scale) return image.resize((new_w, new_h), PIL.Image.LANCZOS) def run( self, image: PIL.Image.Image, prompt: str, negative_prompt: str, num_inference_steps: int = 30, guidance_scale: float = 7.5, adapter_conditioning_scale: float = 0.8, cond_tau: float = 0.8, seed: int = 0, apply_preprocess: bool = True, ) -> list[PIL.Image.Image]: if num_inference_steps > self.MAX_NUM_INFERENCE_STEPS: raise ValueError(f"Number of steps must be less than {self.MAX_NUM_INFERENCE_STEPS}") # Resize image to avoid OOM image = self.resize_image(image) if apply_preprocess: image = self.preprocessor(image) generator = torch.Generator(device=self.device).manual_seed(seed) out = self.pipe( prompt=prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=num_inference_steps, adapter_conditioning_scale=adapter_conditioning_scale, cond_tau=cond_tau, generator=generator, guidance_scale=guidance_scale, ).images[0] return [image, out]