extensions / sd-webui-controlnet /scripts /processor.py

Upload folder using huggingface_hub

0163a2c verified almost 2 years ago

37.1 kB

	import os
	import cv2
	import numpy as np
	import torch
	import math
	from dataclasses import dataclass
	from transformers.models.clip.modeling_clip import CLIPVisionModelOutput

	from annotator.util import HWC3
	from typing import Callable, Tuple, Union

	from modules.safe import Extra
	from modules import devices
	from scripts.logging import logger


	def torch_handler(module: str, name: str):
	""" Allow all torch access. Bypass A1111 safety whitelist. """
	if module == 'torch':
	return getattr(torch, name)
	if module == 'torch._tensor':
	# depth_anything dep.
	return getattr(torch._tensor, name)


	def pad64(x):
	return int(np.ceil(float(x) / 64.0) * 64 - x)


	def safer_memory(x):
	# Fix many MAC/AMD problems
	return np.ascontiguousarray(x.copy()).copy()


	def resize_image_with_pad(input_image, resolution, skip_hwc3=False):
	if skip_hwc3:
	img = input_image
	else:
	img = HWC3(input_image)
	H_raw, W_raw, _ = img.shape
	k = float(resolution) / float(min(H_raw, W_raw))
	interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA
	H_target = int(np.round(float(H_raw) * k))
	W_target = int(np.round(float(W_raw) * k))
	img = cv2.resize(img, (W_target, H_target), interpolation=interpolation)
	H_pad, W_pad = pad64(H_target), pad64(W_target)
	img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge')

	def remove_pad(x):
	return safer_memory(x[:H_target, :W_target])

	return safer_memory(img_padded), remove_pad


	model_canny = None


	def canny(img, res=512, thr_a=100, thr_b=200, **kwargs):
	l, h = thr_a, thr_b
	img, remove_pad = resize_image_with_pad(img, res)
	global model_canny
	if model_canny is None:
	from annotator.canny import apply_canny
	model_canny = apply_canny
	result = model_canny(img, l, h)
	return remove_pad(result), True


	def scribble_thr(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	result = np.zeros_like(img, dtype=np.uint8)
	result[np.min(img, axis=2) < 127] = 255
	return remove_pad(result), True


	def scribble_xdog(img, res=512, thr_a=32, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	g1 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 0.5)
	g2 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 5.0)
	dog = (255 - np.min(g2 - g1, axis=2)).clip(0, 255).astype(np.uint8)
	result = np.zeros_like(img, dtype=np.uint8)
	result[2 * (255 - dog) > thr_a] = 255
	return remove_pad(result), True


	def tile_resample(img, res=512, thr_a=1.0, **kwargs):
	img = HWC3(img)
	if thr_a < 1.1:
	return img, True
	H, W, C = img.shape
	H = int(float(H) / float(thr_a))
	W = int(float(W) / float(thr_a))
	img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA)
	return img, True


	def threshold(img, res=512, thr_a=127, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	result = np.zeros_like(img, dtype=np.uint8)
	result[np.min(img, axis=2) > thr_a] = 255
	return remove_pad(result), True


	def identity(img, **kwargs):
	return img, True


	def invert(img, res=512, **kwargs):
	return 255 - HWC3(img), True


	model_hed = None


	def hed(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_hed
	if model_hed is None:
	from annotator.hed import apply_hed
	model_hed = apply_hed
	result = model_hed(img)
	return remove_pad(result), True


	def hed_safe(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_hed
	if model_hed is None:
	from annotator.hed import apply_hed
	model_hed = apply_hed
	result = model_hed(img, is_safe=True)
	return remove_pad(result), True


	def unload_hed():
	global model_hed
	if model_hed is not None:
	from annotator.hed import unload_hed_model
	unload_hed_model()


	def scribble_hed(img, res=512, **kwargs):
	result, _ = hed(img, res)
	import cv2
	from annotator.util import nms
	result = nms(result, 127, 3.0)
	result = cv2.GaussianBlur(result, (0, 0), 3.0)
	result[result > 4] = 255
	result[result < 255] = 0
	return result, True


	model_mediapipe_face = None


	def mediapipe_face(img, res=512, thr_a: int = 10, thr_b: float = 0.5, **kwargs):
	max_faces = int(thr_a)
	min_confidence = thr_b
	img, remove_pad = resize_image_with_pad(img, res)
	global model_mediapipe_face
	if model_mediapipe_face is None:
	from annotator.mediapipe_face import apply_mediapipe_face
	model_mediapipe_face = apply_mediapipe_face
	result = model_mediapipe_face(img, max_faces=max_faces, min_confidence=min_confidence)
	return remove_pad(result), True


	model_mlsd = None


	def mlsd(img, res=512, thr_a=0.1, thr_b=0.1, **kwargs):
	thr_v, thr_d = thr_a, thr_b
	img, remove_pad = resize_image_with_pad(img, res)
	global model_mlsd
	if model_mlsd is None:
	from annotator.mlsd import apply_mlsd
	model_mlsd = apply_mlsd
	result = model_mlsd(img, thr_v, thr_d)
	return remove_pad(result), True


	def unload_mlsd():
	global model_mlsd
	if model_mlsd is not None:
	from annotator.mlsd import unload_mlsd_model
	unload_mlsd_model()


	model_depth_anything = None


	def depth_anything(img, res:int = 512, colored:bool = True, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_depth_anything
	if model_depth_anything is None:
	with Extra(torch_handler):
	from annotator.depth_anything import DepthAnythingDetector
	device = devices.get_device_for("controlnet")
	model_depth_anything = DepthAnythingDetector(device)
	return remove_pad(model_depth_anything(img, colored=colored)), True


	def unload_depth_anything():
	if model_depth_anything is not None:
	model_depth_anything.unload_model()


	model_midas = None


	def midas(img, res=512, a=np.pi * 2.0, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_midas
	if model_midas is None:
	from annotator.midas import apply_midas
	model_midas = apply_midas
	result, _ = model_midas(img, a)
	return remove_pad(result), True


	def midas_normal(img, res=512, a=np.pi * 2.0, thr_a=0.4, **kwargs): # bg_th -> thr_a
	bg_th = thr_a
	img, remove_pad = resize_image_with_pad(img, res)
	global model_midas
	if model_midas is None:
	from annotator.midas import apply_midas
	model_midas = apply_midas
	_, result = model_midas(img, a, bg_th)
	return remove_pad(result), True


	def unload_midas():
	global model_midas
	if model_midas is not None:
	from annotator.midas import unload_midas_model
	unload_midas_model()


	model_leres = None


	def leres(img, res=512, a=np.pi * 2.0, thr_a=0, thr_b=0, boost=False, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_leres
	if model_leres is None:
	from annotator.leres import apply_leres
	model_leres = apply_leres
	result = model_leres(img, thr_a, thr_b, boost=boost)
	return remove_pad(result), True


	def unload_leres():
	global model_leres
	if model_leres is not None:
	from annotator.leres import unload_leres_model
	unload_leres_model()


	class OpenposeModel(object):
	def __init__(self) -> None:
	self.model_openpose = None

	def run_model(
	self,
	img: np.ndarray,
	include_body: bool,
	include_hand: bool,
	include_face: bool,
	use_dw_pose: bool = False,
	use_animal_pose: bool = False,
	json_pose_callback: Callable[[str], None] = None,
	res: int = 512,
	**kwargs # Ignore rest of kwargs
	) -> Tuple[np.ndarray, bool]:
	"""Run the openpose model. Returns a tuple of
	- result image
	- is_image flag

	The JSON format pose string is passed to `json_pose_callback`.
	"""
	if json_pose_callback is None:
	json_pose_callback = lambda x: None

	img, remove_pad = resize_image_with_pad(img, res)

	if self.model_openpose is None:
	from annotator.openpose import OpenposeDetector
	self.model_openpose = OpenposeDetector()

	return remove_pad(self.model_openpose(
	img,
	include_body=include_body,
	include_hand=include_hand,
	include_face=include_face,
	use_dw_pose=use_dw_pose,
	use_animal_pose=use_animal_pose,
	json_pose_callback=json_pose_callback
	)), True

	def unload(self):
	if self.model_openpose is not None:
	self.model_openpose.unload_model()


	g_openpose_model = OpenposeModel()

	model_uniformer = None


	def uniformer(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_uniformer
	if model_uniformer is None:
	from annotator.uniformer import apply_uniformer
	model_uniformer = apply_uniformer
	result = model_uniformer(img)
	return remove_pad(result), True


	def unload_uniformer():
	global model_uniformer
	if model_uniformer is not None:
	from annotator.uniformer import unload_uniformer_model
	unload_uniformer_model()


	model_pidinet = None


	def pidinet(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_pidinet
	if model_pidinet is None:
	from annotator.pidinet import apply_pidinet
	model_pidinet = apply_pidinet
	result = model_pidinet(img)
	return remove_pad(result), True


	def pidinet_ts(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_pidinet
	if model_pidinet is None:
	from annotator.pidinet import apply_pidinet
	model_pidinet = apply_pidinet
	result = model_pidinet(img, apply_fliter=True)
	return remove_pad(result), True


	def pidinet_safe(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_pidinet
	if model_pidinet is None:
	from annotator.pidinet import apply_pidinet
	model_pidinet = apply_pidinet
	result = model_pidinet(img, is_safe=True)
	return remove_pad(result), True


	def scribble_pidinet(img, res=512, **kwargs):
	result, _ = pidinet(img, res)
	import cv2
	from annotator.util import nms
	result = nms(result, 127, 3.0)
	result = cv2.GaussianBlur(result, (0, 0), 3.0)
	result[result > 4] = 255
	result[result < 255] = 0
	return result, True


	def unload_pidinet():
	global model_pidinet
	if model_pidinet is not None:
	from annotator.pidinet import unload_pid_model
	unload_pid_model()


	clip_encoder = {
	'clip_g': None,
	'clip_h': None,
	'clip_vitl': None,
	}


	def clip(img, res=512, config='clip_vitl', low_vram=False, **kwargs):
	img = HWC3(img)
	global clip_encoder
	if clip_encoder[config] is None:
	from annotator.clipvision import ClipVisionDetector
	if low_vram:
	logger.info("Loading CLIP model on CPU.")
	clip_encoder[config] = ClipVisionDetector(config, low_vram)
	result = clip_encoder[config](img)
	return result, False


	def unload_clip(config='clip_vitl'):
	global clip_encoder
	if clip_encoder[config] is not None:
	clip_encoder[config].unload_model()
	clip_encoder[config] = None


	model_color = None


	def color(img, res=512, **kwargs):
	img = HWC3(img)
	global model_color
	if model_color is None:
	from annotator.color import apply_color
	model_color = apply_color
	result = model_color(img, res=res)
	return result, True


	def lineart_standard(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	x = img.astype(np.float32)
	g = cv2.GaussianBlur(x, (0, 0), 6.0)
	intensity = np.min(g - x, axis=2).clip(0, 255)
	intensity /= max(16, np.median(intensity[intensity > 8]))
	intensity *= 127
	result = intensity.clip(0, 255).astype(np.uint8)
	return remove_pad(result), True


	model_lineart = None


	def lineart(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_lineart
	if model_lineart is None:
	from annotator.lineart import LineartDetector
	model_lineart = LineartDetector(LineartDetector.model_default)

	# applied auto inversion
	result = 255 - model_lineart(img)
	return remove_pad(result), True


	def unload_lineart():
	global model_lineart
	if model_lineart is not None:
	model_lineart.unload_model()


	model_lineart_coarse = None


	def lineart_coarse(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_lineart_coarse
	if model_lineart_coarse is None:
	from annotator.lineart import LineartDetector
	model_lineart_coarse = LineartDetector(LineartDetector.model_coarse)

	# applied auto inversion
	result = 255 - model_lineart_coarse(img)
	return remove_pad(result), True


	def unload_lineart_coarse():
	global model_lineart_coarse
	if model_lineart_coarse is not None:
	model_lineart_coarse.unload_model()


	model_lineart_anime = None


	def lineart_anime(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_lineart_anime
	if model_lineart_anime is None:
	from annotator.lineart_anime import LineartAnimeDetector
	model_lineart_anime = LineartAnimeDetector()

	# applied auto inversion
	result = 255 - model_lineart_anime(img)
	return remove_pad(result), True


	def unload_lineart_anime():
	global model_lineart_anime
	if model_lineart_anime is not None:
	model_lineart_anime.unload_model()


	model_manga_line = None


	def lineart_anime_denoise(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_manga_line
	if model_manga_line is None:
	from annotator.manga_line import MangaLineExtration
	model_manga_line = MangaLineExtration()

	# applied auto inversion
	result = model_manga_line(img)
	return remove_pad(result), True


	def unload_lineart_anime_denoise():
	global model_manga_line
	if model_manga_line is not None:
	model_manga_line.unload_model()


	model_lama = None


	def lama_inpaint(img, res=512, **kwargs):
	H, W, C = img.shape
	raw_color = img[:, :, 0:3].copy()
	raw_mask = img[:, :, 3:4].copy()

	res = 256 # Always use 256 since lama is trained on 256

	img_res, remove_pad = resize_image_with_pad(img, res, skip_hwc3=True)

	global model_lama
	if model_lama is None:
	from annotator.lama import LamaInpainting
	model_lama = LamaInpainting()

	# applied auto inversion
	prd_color = model_lama(img_res)
	prd_color = remove_pad(prd_color)
	prd_color = cv2.resize(prd_color, (W, H))

	alpha = raw_mask.astype(np.float32) / 255.0
	fin_color = prd_color.astype(np.float32) * alpha + raw_color.astype(np.float32) * (1 - alpha)
	fin_color = fin_color.clip(0, 255).astype(np.uint8)

	result = np.concatenate([fin_color, raw_mask], axis=2)

	return result, True


	def unload_lama_inpaint():
	global model_lama
	if model_lama is not None:
	model_lama.unload_model()


	model_zoe_depth = None


	def zoe_depth(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_zoe_depth
	if model_zoe_depth is None:
	from annotator.zoe import ZoeDetector
	model_zoe_depth = ZoeDetector()
	result = model_zoe_depth(img)
	return remove_pad(result), True


	def unload_zoe_depth():
	global model_zoe_depth
	if model_zoe_depth is not None:
	model_zoe_depth.unload_model()


	model_normal_bae = None


	def normal_bae(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_normal_bae
	if model_normal_bae is None:
	from annotator.normalbae import NormalBaeDetector
	model_normal_bae = NormalBaeDetector()
	result = model_normal_bae(img)
	return remove_pad(result), True


	def unload_normal_bae():
	global model_normal_bae
	if model_normal_bae is not None:
	model_normal_bae.unload_model()


	model_oneformer_coco = None


	def oneformer_coco(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_oneformer_coco
	if model_oneformer_coco is None:
	from annotator.oneformer import OneformerDetector
	model_oneformer_coco = OneformerDetector(OneformerDetector.configs["coco"])
	result = model_oneformer_coco(img)
	return remove_pad(result), True


	def unload_oneformer_coco():
	global model_oneformer_coco
	if model_oneformer_coco is not None:
	model_oneformer_coco.unload_model()


	model_oneformer_ade20k = None


	def oneformer_ade20k(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_oneformer_ade20k
	if model_oneformer_ade20k is None:
	from annotator.oneformer import OneformerDetector
	model_oneformer_ade20k = OneformerDetector(OneformerDetector.configs["ade20k"])
	result = model_oneformer_ade20k(img)
	return remove_pad(result), True


	def unload_oneformer_ade20k():
	global model_oneformer_ade20k
	if model_oneformer_ade20k is not None:
	model_oneformer_ade20k.unload_model()


	model_shuffle = None


	def shuffle(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	img = remove_pad(img)
	global model_shuffle
	if model_shuffle is None:
	from annotator.shuffle import ContentShuffleDetector
	model_shuffle = ContentShuffleDetector()
	result = model_shuffle(img)
	return result, True


	def recolor_luminance(img, res=512, thr_a=1.0, **kwargs):
	result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2LAB)
	result = result[:, :, 0].astype(np.float32) / 255.0
	result = result ** thr_a
	result = (result * 255.0).clip(0, 255).astype(np.uint8)
	result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB)
	return result, True


	def recolor_intensity(img, res=512, thr_a=1.0, **kwargs):
	result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2HSV)
	result = result[:, :, 2].astype(np.float32) / 255.0
	result = result ** thr_a
	result = (result * 255.0).clip(0, 255).astype(np.uint8)
	result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB)
	return result, True


	def blur_gaussian(img, res=512, thr_a=1.0, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	img = remove_pad(img)
	result = cv2.GaussianBlur(img, (0, 0), float(thr_a))
	return result, True


	model_anime_face_segment = None


	def anime_face_segment(img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_anime_face_segment
	if model_anime_face_segment is None:
	from annotator.anime_face_segment import AnimeFaceSegment
	model_anime_face_segment = AnimeFaceSegment()

	result = model_anime_face_segment(img)
	return remove_pad(result), True


	def unload_anime_face_segment():
	global model_anime_face_segment
	if model_anime_face_segment is not None:
	model_anime_face_segment.unload_model()



	def densepose(img, res=512, cmap="viridis", **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	from annotator.densepose import apply_densepose
	result = apply_densepose(img, cmap=cmap)
	return remove_pad(result), True


	def unload_densepose():
	from annotator.densepose import unload_model
	unload_model()

	model_te_hed = None

	def te_hed(img, res=512, thr_a=2, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	global model_te_hed
	if model_te_hed is None:
	from annotator.teed import TEEDDector
	model_te_hed = TEEDDector()
	result = model_te_hed(img, safe_steps=int(thr_a))
	return remove_pad(result), True

	def unload_te_hed():
	if model_te_hed is not None:
	model_te_hed.unload_model()

	class InsightFaceModel:
	def __init__(self, face_analysis_model_name: str = "buffalo_l"):
	self.model = None
	self.face_analysis_model_name = face_analysis_model_name
	self.antelopev2_installed = False

	def install_antelopev2(self):
	"""insightface's github release on antelopev2 model is down. Downloading
	from huggingface mirror."""
	from basicsr.utils.download_util import load_file_from_url
	from annotator.annotator_path import models_path
	model_root = os.path.join(models_path, "insightface", "models", "antelopev2")
	if not model_root:
	os.makedirs(model_root, exist_ok=True)
	for local_file, url in (
	("1k3d68.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/1k3d68.onnx"),
	("2d106det.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/2d106det.onnx"),
	("genderage.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/genderage.onnx"),
	("glintr100.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/glintr100.onnx"),
	("scrfd_10g_bnkps.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/scrfd_10g_bnkps.onnx"),
	):
	local_path = os.path.join(model_root, local_file)
	if not os.path.exists(local_path):
	load_file_from_url(url, model_root)
	self.antelopev2_installed = True

	def load_model(self):
	if self.model is None:
	from insightface.app import FaceAnalysis
	from annotator.annotator_path import models_path
	self.model = FaceAnalysis(
	name=self.face_analysis_model_name,
	providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
	root=os.path.join(models_path, "insightface"),
	)
	self.model.prepare(ctx_id=0, det_size=(640, 640))

	def run_model(self, img: np.ndarray, **kwargs) -> Tuple[torch.Tensor, bool]:
	self.load_model()
	img = HWC3(img)
	faces = self.model.get(img)
	if not faces:
	raise Exception(f"Insightface: No face found in image.")
	if len(faces) > 1:
	logger.warn("Insightface: More than one face is detected in the image. "
	f"Only the first one will be used.")
	return torch.from_numpy(faces[0].normed_embedding).unsqueeze(0), False

	def run_model_instant_id(
	self,
	img: np.ndarray,
	res: int = 512,
	return_keypoints: bool = False,
	**kwargs
	) -> Tuple[Union[np.ndarray, torch.Tensor], bool]:
	"""Run the insightface model for instant_id.
	Arguments:
	- img: Input image in any size.
	- res: Resolution used to resize image.
	- return_keypoints: Whether to return keypoints image or face embedding.
	"""
	def draw_kps(img: np.ndarray, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
	stickwidth = 4
	limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
	kps = np.array(kps)

	h, w, _ = img.shape
	out_img = np.zeros([h, w, 3])

	for i in range(len(limbSeq)):
	index = limbSeq[i]
	color = color_list[index[0]]

	x = kps[index][:, 0]
	y = kps[index][:, 1]
	length = ((x[0] - x[1]) 2 + (y[0] - y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
	polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
	out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
	out_img = (out_img * 0.6).astype(np.uint8)

	for idx_kp, kp in enumerate(kps):
	color = color_list[idx_kp]
	x, y = kp
	out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)

	return out_img.astype(np.uint8)

	if not self.antelopev2_installed:
	self.install_antelopev2()
	self.load_model()

	img, remove_pad = resize_image_with_pad(img, res)
	face_info = self.model.get(img)
	if not face_info:
	raise Exception(f"Insightface: No face found in image.")
	if len(face_info) > 1:
	logger.warn("Insightface: More than one face is detected in the image. "
	f"Only the biggest one will be used.")
	# only use the maximum face
	face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1]
	if return_keypoints:
	return remove_pad(draw_kps(img, face_info['kps'])), True
	else:
	return torch.from_numpy(face_info['embedding']), False


	g_insight_face_model = InsightFaceModel()
	g_insight_face_instant_id_model = InsightFaceModel(face_analysis_model_name="antelopev2")


	@dataclass
	class FaceIdPlusInput:
	face_embed: torch.Tensor
	clip_embed: CLIPVisionModelOutput


	def face_id_plus(img, low_vram=False, **kwargs):
	""" FaceID plus uses both face_embeding from insightface and clip_embeding from clip. """
	face_embed, _ = g_insight_face_model.run_model(img)
	clip_embed, _ = clip(img, config='clip_h', low_vram=low_vram)
	return FaceIdPlusInput(face_embed, clip_embed), False


	class HandRefinerModel:
	def __init__(self):
	self.model = None
	self.device = devices.get_device_for("controlnet")

	def load_model(self):
	if self.model is None:
	from annotator.annotator_path import models_path
	from hand_refiner import MeshGraphormerDetector # installed via hand_refiner_portable
	with Extra(torch_handler):
	self.model = MeshGraphormerDetector.from_pretrained(
	"hr16/ControlNet-HandRefiner-pruned",
	cache_dir=os.path.join(models_path, "hand_refiner"),
	device=self.device,
	)
	else:
	self.model.to(self.device)

	def unload(self):
	if self.model is not None:
	self.model.to("cpu")

	def run_model(self, img, res=512, **kwargs):
	img, remove_pad = resize_image_with_pad(img, res)
	self.load_model()
	with Extra(torch_handler):
	depth_map, mask, info = self.model(
	img, output_type="np",
	detect_resolution=res,
	mask_bbox_padding=30,
	)
	return remove_pad(depth_map), True


	g_hand_refiner_model = HandRefinerModel()


	model_free_preprocessors = [
	"reference_only",
	"reference_adain",
	"reference_adain+attn",
	"revision_clipvision",
	"revision_ignore_prompt"
	]

	no_control_mode_preprocessors = [
	"revision_clipvision",
	"revision_ignore_prompt",
	"clip_vision",
	"ip-adapter_clip_sd15",
	"ip-adapter_clip_sdxl",
	"ip-adapter_clip_sdxl_plus_vith",
	"t2ia_style_clipvision",
	"ip-adapter_face_id",
	"ip-adapter_face_id_plus",
	]

	flag_preprocessor_resolution = "Preprocessor Resolution"
	preprocessor_sliders_config = {
	"none": [],
	"inpaint": [],
	"inpaint_only": [],
	"revision_clipvision": [
	None,
	{
	"name": "Noise Augmentation",
	"value": 0.0,
	"min": 0.0,
	"max": 1.0
	},
	],
	"revision_ignore_prompt": [
	None,
	{
	"name": "Noise Augmentation",
	"value": 0.0,
	"min": 0.0,
	"max": 1.0
	},
	],
	"canny": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	},
	{
	"name": "Canny Low Threshold",
	"value": 100,
	"min": 1,
	"max": 255
	},
	{
	"name": "Canny High Threshold",
	"value": 200,
	"min": 1,
	"max": 255
	},
	],
	"mlsd": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	},
	{
	"name": "MLSD Value Threshold",
	"min": 0.01,
	"max": 2.0,
	"value": 0.1,
	"step": 0.01
	},
	{
	"name": "MLSD Distance Threshold",
	"min": 0.01,
	"max": 20.0,
	"value": 0.1,
	"step": 0.01
	}
	],
	"hed": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"scribble_hed": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"hed_safe": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"openpose": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"openpose_full": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"dw_openpose_full": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"animal_openpose": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"segmentation": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"depth": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"depth_leres": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	},
	{
	"name": "Remove Near %",
	"min": 0,
	"max": 100,
	"value": 0,
	"step": 0.1,
	},
	{
	"name": "Remove Background %",
	"min": 0,
	"max": 100,
	"value": 0,
	"step": 0.1,
	}
	],
	"depth_leres++": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	},
	{
	"name": "Remove Near %",
	"min": 0,
	"max": 100,
	"value": 0,
	"step": 0.1,
	},
	{
	"name": "Remove Background %",
	"min": 0,
	"max": 100,
	"value": 0,
	"step": 0.1,
	}
	],
	"normal_map": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	},
	{
	"name": "Normal Background Threshold",
	"min": 0.0,
	"max": 1.0,
	"value": 0.4,
	"step": 0.01
	}
	],
	"threshold": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	},
	{
	"name": "Binarization Threshold",
	"min": 0,
	"max": 255,
	"value": 127
	}
	],

	"scribble_xdog": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	},
	{
	"name": "XDoG Threshold",
	"min": 1,
	"max": 64,
	"value": 32,
	}
	],
	"blur_gaussian": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	},
	{
	"name": "Sigma",
	"min": 0.01,
	"max": 64.0,
	"value": 9.0,
	}
	],
	"tile_resample": [
	None,
	{
	"name": "Down Sampling Rate",
	"value": 1.0,
	"min": 1.0,
	"max": 8.0,
	"step": 0.01
	}
	],
	"tile_colorfix": [
	None,
	{
	"name": "Variation",
	"value": 8.0,
	"min": 3.0,
	"max": 32.0,
	"step": 1.0
	}
	],
	"tile_colorfix+sharp": [
	None,
	{
	"name": "Variation",
	"value": 8.0,
	"min": 3.0,
	"max": 32.0,
	"step": 1.0
	},
	{
	"name": "Sharpness",
	"value": 1.0,
	"min": 0.0,
	"max": 2.0,
	"step": 0.01
	}
	],
	"reference_only": [
	None,
	{
	"name": r'Style Fidelity (only for "Balanced" mode)',
	"value": 0.5,
	"min": 0.0,
	"max": 1.0,
	"step": 0.01
	}
	],
	"reference_adain": [
	None,
	{
	"name": r'Style Fidelity (only for "Balanced" mode)',
	"value": 0.5,
	"min": 0.0,
	"max": 1.0,
	"step": 0.01
	}
	],
	"reference_adain+attn": [
	None,
	{
	"name": r'Style Fidelity (only for "Balanced" mode)',
	"value": 0.5,
	"min": 0.0,
	"max": 1.0,
	"step": 0.01
	}
	],
	"inpaint_only+lama": [],
	"color": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048,
	}
	],
	"mediapipe_face": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048,
	},
	{
	"name": "Max Faces",
	"value": 1,
	"min": 1,
	"max": 10,
	"step": 1
	},
	{
	"name": "Min Face Confidence",
	"value": 0.5,
	"min": 0.01,
	"max": 1.0,
	"step": 0.01
	}
	],
	"recolor_luminance": [
	None,
	{
	"name": "Gamma Correction",
	"value": 1.0,
	"min": 0.1,
	"max": 2.0,
	"step": 0.001
	}
	],
	"recolor_intensity": [
	None,
	{
	"name": "Gamma Correction",
	"value": 1.0,
	"min": 0.1,
	"max": 2.0,
	"step": 0.001
	}
	],
	"anime_face_segment": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	}
	],
	"densepose": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"densepose_parula": [
	{
	"name": flag_preprocessor_resolution,
	"min": 64,
	"max": 2048,
	"value": 512
	}
	],
	"depth_hand_refiner": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	}
	],
	"te_hed": [
	{
	"name": flag_preprocessor_resolution,
	"value": 512,
	"min": 64,
	"max": 2048
	},
	{
	"name": "Safe Steps",
	"min": 0,
	"max": 10,
	"value": 2,
	"step": 1,
	},
	],
	}

	preprocessor_filters = {
	"All": "none",
	"Canny": "canny",
	"Depth": "depth_midas",
	"NormalMap": "normal_bae",
	"OpenPose": "openpose_full",
	"MLSD": "mlsd",
	"Lineart": "lineart_standard (from white bg & black line)",
	"SoftEdge": "softedge_pidinet",
	"Scribble/Sketch": "scribble_pidinet",
	"Segmentation": "seg_ofade20k",
	"Shuffle": "shuffle",
	"Tile/Blur": "tile_resample",
	"Inpaint": "inpaint_only",
	"InstructP2P": "none",
	"Reference": "reference_only",
	"Recolor": "recolor_luminance",
	"Revision": "revision_clipvision",
	"T2I-Adapter": "none",
	"IP-Adapter": "ip-adapter_clip_sd15",
	"Instant_ID": "instant_id",
	}

	preprocessor_filters_aliases = {
	'instructp2p': ['ip2p'],
	'segmentation': ['seg'],
	'normalmap': ['normal'],
	't2i-adapter': ['t2i_adapter', 't2iadapter', 't2ia'],
	'ip-adapter': ['ip_adapter', 'ipadapter'],
	'scribble/sketch': ['scribble', 'sketch'],
	'tile/blur': ['tile', 'blur'],
	'openpose':['openpose', 'densepose'],
	} # must use all lower texts