Spaces:
Runtime error
Runtime error
import os | |
import cv2 | |
import numpy as np | |
import torch | |
import math | |
from dataclasses import dataclass | |
from transformers.models.clip.modeling_clip import CLIPVisionModelOutput | |
from typing import Callable, Tuple, Union | |
from modules.safe import Extra | |
from modules import devices | |
from annotator.util import HWC3 | |
from scripts.logging import logger | |
def torch_handler(module: str, name: str): | |
""" Allow all torch access. Bypass A1111 safety whitelist. """ | |
if module == 'torch': | |
return getattr(torch, name) | |
if module == 'torch._tensor': | |
# depth_anything dep. | |
return getattr(torch._tensor, name) | |
def pad64(x): | |
return int(np.ceil(float(x) / 64.0) * 64 - x) | |
def safer_memory(x): | |
# Fix many MAC/AMD problems | |
return np.ascontiguousarray(x.copy()).copy() | |
def resize_image_with_pad(input_image, resolution, skip_hwc3=False): | |
if skip_hwc3: | |
img = input_image | |
else: | |
img = HWC3(input_image) | |
H_raw, W_raw, _ = img.shape | |
k = float(resolution) / float(min(H_raw, W_raw)) | |
interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA | |
H_target = int(np.round(float(H_raw) * k)) | |
W_target = int(np.round(float(W_raw) * k)) | |
img = cv2.resize(img, (W_target, H_target), interpolation=interpolation) | |
H_pad, W_pad = pad64(H_target), pad64(W_target) | |
img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge') | |
def remove_pad(x): | |
return safer_memory(x[:H_target, :W_target]) | |
return safer_memory(img_padded), remove_pad | |
def canny(img, res=512, thr_a=100, thr_b=200, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
result = cv2.Canny(img, thr_a, thr_b) | |
return remove_pad(result), True | |
def scribble_xdog(img, res=512, thr_a=32, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
g1 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 0.5) | |
g2 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 5.0) | |
dog = (255 - np.min(g2 - g1, axis=2)).clip(0, 255).astype(np.uint8) | |
result = np.zeros_like(img, dtype=np.uint8) | |
result[2 * (255 - dog) > thr_a] = 255 | |
return remove_pad(result), True | |
def tile_resample(img, res=512, thr_a=1.0, **kwargs): | |
img = HWC3(img) | |
if thr_a < 1.1: | |
return img, True | |
H, W, C = img.shape | |
H = int(float(H) / float(thr_a)) | |
W = int(float(W) / float(thr_a)) | |
img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) | |
return img, True | |
def threshold(img, res=512, thr_a=127, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
result = np.zeros_like(img, dtype=np.uint8) | |
result[np.min(img, axis=2) > thr_a] = 255 | |
return remove_pad(result), True | |
def identity(img, **kwargs): | |
return img, True | |
def invert(img, res=512, **kwargs): | |
return 255 - HWC3(img), True | |
model_hed = None | |
def hed(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_hed | |
if model_hed is None: | |
from annotator.hed import apply_hed | |
model_hed = apply_hed | |
result = model_hed(img) | |
return remove_pad(result), True | |
def hed_safe(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_hed | |
if model_hed is None: | |
from annotator.hed import apply_hed | |
model_hed = apply_hed | |
result = model_hed(img, is_safe=True) | |
return remove_pad(result), True | |
def unload_hed(): | |
global model_hed | |
if model_hed is not None: | |
from annotator.hed import unload_hed_model | |
unload_hed_model() | |
def scribble_hed(img, res=512, **kwargs): | |
result, _ = hed(img, res) | |
import cv2 | |
from annotator.util import nms | |
result = nms(result, 127, 3.0) | |
result = cv2.GaussianBlur(result, (0, 0), 3.0) | |
result[result > 4] = 255 | |
result[result < 255] = 0 | |
return result, True | |
model_mediapipe_face = None | |
def mediapipe_face(img, res=512, thr_a: int = 10, thr_b: float = 0.5, **kwargs): | |
max_faces = int(thr_a) | |
min_confidence = thr_b | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_mediapipe_face | |
if model_mediapipe_face is None: | |
from annotator.mediapipe_face import apply_mediapipe_face | |
model_mediapipe_face = apply_mediapipe_face | |
result = model_mediapipe_face(img, max_faces=max_faces, min_confidence=min_confidence) | |
return remove_pad(result), True | |
model_mlsd = None | |
def mlsd(img, res=512, thr_a=0.1, thr_b=0.1, **kwargs): | |
thr_v, thr_d = thr_a, thr_b | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_mlsd | |
if model_mlsd is None: | |
from annotator.mlsd import apply_mlsd | |
model_mlsd = apply_mlsd | |
result = model_mlsd(img, thr_v, thr_d) | |
return remove_pad(result), True | |
def unload_mlsd(): | |
global model_mlsd | |
if model_mlsd is not None: | |
from annotator.mlsd import unload_mlsd_model | |
unload_mlsd_model() | |
model_depth_anything = None | |
def depth_anything(img, res:int = 512, colored:bool = True, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_depth_anything | |
if model_depth_anything is None: | |
with Extra(torch_handler): | |
from annotator.depth_anything import DepthAnythingDetector | |
device = devices.get_device_for("controlnet") | |
model_depth_anything = DepthAnythingDetector(device) | |
return remove_pad(model_depth_anything(img, colored=colored)), True | |
def unload_depth_anything(): | |
if model_depth_anything is not None: | |
model_depth_anything.unload_model() | |
model_midas = None | |
def midas(img, res=512, a=np.pi * 2.0, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_midas | |
if model_midas is None: | |
from annotator.midas import apply_midas | |
model_midas = apply_midas | |
result, _ = model_midas(img, a) | |
return remove_pad(result), True | |
def midas_normal(img, res=512, a=np.pi * 2.0, thr_a=0.4, **kwargs): # bg_th -> thr_a | |
bg_th = thr_a | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_midas | |
if model_midas is None: | |
from annotator.midas import apply_midas | |
model_midas = apply_midas | |
_, result = model_midas(img, a, bg_th) | |
return remove_pad(result), True | |
def unload_midas(): | |
global model_midas | |
if model_midas is not None: | |
from annotator.midas import unload_midas_model | |
unload_midas_model() | |
model_leres = None | |
def leres(img, res=512, a=np.pi * 2.0, thr_a=0, thr_b=0, boost=False, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_leres | |
if model_leres is None: | |
from annotator.leres import apply_leres | |
model_leres = apply_leres | |
result = model_leres(img, thr_a, thr_b, boost=boost) | |
return remove_pad(result), True | |
def unload_leres(): | |
global model_leres | |
if model_leres is not None: | |
from annotator.leres import unload_leres_model | |
unload_leres_model() | |
class OpenposeModel(object): | |
def __init__(self) -> None: | |
self.model_openpose = None | |
def run_model( | |
self, | |
img: np.ndarray, | |
include_body: bool, | |
include_hand: bool, | |
include_face: bool, | |
use_dw_pose: bool = False, | |
use_animal_pose: bool = False, | |
json_pose_callback: Callable[[str], None] = None, | |
res: int = 512, | |
**kwargs # Ignore rest of kwargs | |
) -> Tuple[np.ndarray, bool]: | |
"""Run the openpose model. Returns a tuple of | |
- result image | |
- is_image flag | |
The JSON format pose string is passed to `json_pose_callback`. | |
""" | |
if json_pose_callback is None: | |
json_pose_callback = lambda x: None | |
img, remove_pad = resize_image_with_pad(img, res) | |
if self.model_openpose is None: | |
from annotator.openpose import OpenposeDetector | |
self.model_openpose = OpenposeDetector() | |
return remove_pad(self.model_openpose( | |
img, | |
include_body=include_body, | |
include_hand=include_hand, | |
include_face=include_face, | |
use_dw_pose=use_dw_pose, | |
use_animal_pose=use_animal_pose, | |
json_pose_callback=json_pose_callback | |
)), True | |
def unload(self): | |
if self.model_openpose is not None: | |
self.model_openpose.unload_model() | |
g_openpose_model = OpenposeModel() | |
model_uniformer = None | |
def uniformer(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_uniformer | |
if model_uniformer is None: | |
from annotator.uniformer import apply_uniformer | |
model_uniformer = apply_uniformer | |
result = model_uniformer(img) | |
return remove_pad(result), True | |
def unload_uniformer(): | |
global model_uniformer | |
if model_uniformer is not None: | |
from annotator.uniformer import unload_uniformer_model | |
unload_uniformer_model() | |
model_pidinet = None | |
def pidinet(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_pidinet | |
if model_pidinet is None: | |
from annotator.pidinet import apply_pidinet | |
model_pidinet = apply_pidinet | |
result = model_pidinet(img) | |
return remove_pad(result), True | |
def pidinet_ts(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_pidinet | |
if model_pidinet is None: | |
from annotator.pidinet import apply_pidinet | |
model_pidinet = apply_pidinet | |
result = model_pidinet(img, apply_fliter=True) | |
return remove_pad(result), True | |
def pidinet_safe(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_pidinet | |
if model_pidinet is None: | |
from annotator.pidinet import apply_pidinet | |
model_pidinet = apply_pidinet | |
result = model_pidinet(img, is_safe=True) | |
return remove_pad(result), True | |
def scribble_pidinet(img, res=512, **kwargs): | |
result, _ = pidinet(img, res) | |
import cv2 | |
from annotator.util import nms | |
result = nms(result, 127, 3.0) | |
result = cv2.GaussianBlur(result, (0, 0), 3.0) | |
result[result > 4] = 255 | |
result[result < 255] = 0 | |
return result, True | |
def unload_pidinet(): | |
global model_pidinet | |
if model_pidinet is not None: | |
from annotator.pidinet import unload_pid_model | |
unload_pid_model() | |
clip_encoder = { | |
'clip_g': None, | |
'clip_h': None, | |
'clip_vitl': None, | |
} | |
def clip(img, res=512, config='clip_vitl', low_vram=False, **kwargs): | |
global clip_encoder | |
if clip_encoder[config] is None: | |
from annotator.clipvision import ClipVisionDetector | |
if low_vram: | |
logger.info("Loading CLIP model on CPU.") | |
clip_encoder[config] = ClipVisionDetector(config, low_vram) | |
result = clip_encoder[config](img) | |
return result, False | |
def unload_clip(config='clip_vitl'): | |
global clip_encoder | |
if clip_encoder[config] is not None: | |
clip_encoder[config].unload_model() | |
clip_encoder[config] = None | |
model_color = None | |
def color(img, res=512, **kwargs): | |
img = HWC3(img) | |
global model_color | |
if model_color is None: | |
from annotator.color import apply_color | |
model_color = apply_color | |
result = model_color(img, res=res) | |
return result, True | |
def lineart_standard(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
x = img.astype(np.float32) | |
g = cv2.GaussianBlur(x, (0, 0), 6.0) | |
intensity = np.min(g - x, axis=2).clip(0, 255) | |
intensity /= max(16, np.median(intensity[intensity > 8])) | |
intensity *= 127 | |
result = intensity.clip(0, 255).astype(np.uint8) | |
return remove_pad(result), True | |
model_lineart = None | |
def lineart(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_lineart | |
if model_lineart is None: | |
from annotator.lineart import LineartDetector | |
model_lineart = LineartDetector(LineartDetector.model_default) | |
# applied auto inversion | |
result = 255 - model_lineart(img) | |
return remove_pad(result), True | |
def unload_lineart(): | |
global model_lineart | |
if model_lineart is not None: | |
model_lineart.unload_model() | |
model_lineart_coarse = None | |
def lineart_coarse(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_lineart_coarse | |
if model_lineart_coarse is None: | |
from annotator.lineart import LineartDetector | |
model_lineart_coarse = LineartDetector(LineartDetector.model_coarse) | |
# applied auto inversion | |
result = 255 - model_lineart_coarse(img) | |
return remove_pad(result), True | |
def unload_lineart_coarse(): | |
global model_lineart_coarse | |
if model_lineart_coarse is not None: | |
model_lineart_coarse.unload_model() | |
model_lineart_anime = None | |
def lineart_anime(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_lineart_anime | |
if model_lineart_anime is None: | |
from annotator.lineart_anime import LineartAnimeDetector | |
model_lineart_anime = LineartAnimeDetector() | |
# applied auto inversion | |
result = 255 - model_lineart_anime(img) | |
return remove_pad(result), True | |
def unload_lineart_anime(): | |
global model_lineart_anime | |
if model_lineart_anime is not None: | |
model_lineart_anime.unload_model() | |
model_manga_line = None | |
def lineart_anime_denoise(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_manga_line | |
if model_manga_line is None: | |
from annotator.manga_line import MangaLineExtration | |
model_manga_line = MangaLineExtration() | |
# applied auto inversion | |
result = model_manga_line(img) | |
return remove_pad(result), True | |
def unload_lineart_anime_denoise(): | |
global model_manga_line | |
if model_manga_line is not None: | |
model_manga_line.unload_model() | |
model_zoe_depth = None | |
def zoe_depth(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_zoe_depth | |
if model_zoe_depth is None: | |
from annotator.zoe import ZoeDetector | |
model_zoe_depth = ZoeDetector() | |
result = model_zoe_depth(img) | |
return remove_pad(result), True | |
def unload_zoe_depth(): | |
global model_zoe_depth | |
if model_zoe_depth is not None: | |
model_zoe_depth.unload_model() | |
model_normal_bae = None | |
def normal_bae(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_normal_bae | |
if model_normal_bae is None: | |
from annotator.normalbae import NormalBaeDetector | |
model_normal_bae = NormalBaeDetector() | |
result = model_normal_bae(img) | |
return remove_pad(result), True | |
def unload_normal_bae(): | |
global model_normal_bae | |
if model_normal_bae is not None: | |
model_normal_bae.unload_model() | |
model_oneformer_coco = None | |
def oneformer_coco(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_oneformer_coco | |
if model_oneformer_coco is None: | |
from annotator.oneformer import OneformerDetector | |
model_oneformer_coco = OneformerDetector(OneformerDetector.configs["coco"]) | |
result = model_oneformer_coco(img) | |
return remove_pad(result), True | |
def unload_oneformer_coco(): | |
global model_oneformer_coco | |
if model_oneformer_coco is not None: | |
model_oneformer_coco.unload_model() | |
model_oneformer_ade20k = None | |
def oneformer_ade20k(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_oneformer_ade20k | |
if model_oneformer_ade20k is None: | |
from annotator.oneformer import OneformerDetector | |
model_oneformer_ade20k = OneformerDetector(OneformerDetector.configs["ade20k"]) | |
result = model_oneformer_ade20k(img) | |
return remove_pad(result), True | |
def unload_oneformer_ade20k(): | |
global model_oneformer_ade20k | |
if model_oneformer_ade20k is not None: | |
model_oneformer_ade20k.unload_model() | |
def recolor_luminance(img, res=512, thr_a=1.0, **kwargs): | |
result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2LAB) | |
result = result[:, :, 0].astype(np.float32) / 255.0 | |
result = result ** thr_a | |
result = (result * 255.0).clip(0, 255).astype(np.uint8) | |
result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) | |
return result, True | |
def recolor_intensity(img, res=512, thr_a=1.0, **kwargs): | |
result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2HSV) | |
result = result[:, :, 2].astype(np.float32) / 255.0 | |
result = result ** thr_a | |
result = (result * 255.0).clip(0, 255).astype(np.uint8) | |
result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) | |
return result, True | |
def blur_gaussian(img, res=512, thr_a=1.0, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
img = remove_pad(img) | |
result = cv2.GaussianBlur(img, (0, 0), float(thr_a)) | |
return result, True | |
model_anime_face_segment = None | |
def anime_face_segment(img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_anime_face_segment | |
if model_anime_face_segment is None: | |
from annotator.anime_face_segment import AnimeFaceSegment | |
model_anime_face_segment = AnimeFaceSegment() | |
result = model_anime_face_segment(img) | |
return remove_pad(result), True | |
def unload_anime_face_segment(): | |
global model_anime_face_segment | |
if model_anime_face_segment is not None: | |
model_anime_face_segment.unload_model() | |
def densepose(img, res=512, cmap="viridis", **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
from annotator.densepose import apply_densepose | |
result = apply_densepose(img, cmap=cmap) | |
return remove_pad(result), True | |
def unload_densepose(): | |
from annotator.densepose import unload_model | |
unload_model() | |
model_te_hed = None | |
def te_hed(img, res=512, thr_a=2, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
global model_te_hed | |
if model_te_hed is None: | |
from annotator.teed import TEEDDector | |
model_te_hed = TEEDDector() | |
result = model_te_hed(img, safe_steps=int(thr_a)) | |
return remove_pad(result), True | |
def unload_te_hed(): | |
if model_te_hed is not None: | |
model_te_hed.unload_model() | |
class InsightFaceModel: | |
def __init__(self, face_analysis_model_name: str = "buffalo_l"): | |
self.model = None | |
self.face_analysis_model_name = face_analysis_model_name | |
self.antelopev2_installed = False | |
def pick_largest_face(faces): | |
if not faces: | |
raise Exception("Insightface: No face found in image.") | |
if len(faces) > 1: | |
logger.warn("Insightface: More than one face is detected in the image. " | |
"Only the biggest one will be used.") | |
# only use the biggest face | |
face = sorted(faces, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] | |
return face | |
def install_antelopev2(self): | |
"""insightface's github release on antelopev2 model is down. Downloading | |
from huggingface mirror.""" | |
from scripts.utils import load_file_from_url | |
from annotator.annotator_path import models_path | |
model_root = os.path.join(models_path, "insightface", "models", "antelopev2") | |
if not model_root: | |
os.makedirs(model_root, exist_ok=True) | |
for local_file, url in ( | |
("1k3d68.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/1k3d68.onnx"), | |
("2d106det.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/2d106det.onnx"), | |
("genderage.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/genderage.onnx"), | |
("glintr100.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/glintr100.onnx"), | |
("scrfd_10g_bnkps.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/scrfd_10g_bnkps.onnx"), | |
): | |
local_path = os.path.join(model_root, local_file) | |
if not os.path.exists(local_path): | |
load_file_from_url(url, model_dir=model_root) | |
self.antelopev2_installed = True | |
def load_model(self): | |
if self.model is None: | |
from insightface.app import FaceAnalysis | |
from annotator.annotator_path import models_path | |
self.model = FaceAnalysis( | |
name=self.face_analysis_model_name, | |
providers=['CUDAExecutionProvider', 'CPUExecutionProvider'], | |
root=os.path.join(models_path, "insightface"), | |
) | |
self.model.prepare(ctx_id=0, det_size=(640, 640)) | |
def run_model(self, img: np.ndarray, **kwargs) -> Tuple[torch.Tensor, bool]: | |
self.load_model() | |
img = img[:, :, :3] # Drop alpha channel if there is one. | |
faces = self.model.get(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) | |
face = InsightFaceModel.pick_largest_face(faces) | |
return torch.from_numpy(face.normed_embedding).unsqueeze(0), False | |
def run_model_instant_id( | |
self, | |
img: np.ndarray, | |
res: int = 512, | |
return_keypoints: bool = False, | |
**kwargs | |
) -> Tuple[Union[np.ndarray, torch.Tensor], bool]: | |
"""Run the insightface model for instant_id. | |
Arguments: | |
- img: Input image in any size. | |
- res: Resolution used to resize image. | |
- return_keypoints: Whether to return keypoints image or face embedding. | |
""" | |
def draw_kps(img: np.ndarray, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]): | |
stickwidth = 4 | |
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]]) | |
kps = np.array(kps) | |
h, w, _ = img.shape | |
out_img = np.zeros([h, w, 3]) | |
for i in range(len(limbSeq)): | |
index = limbSeq[i] | |
color = color_list[index[0]] | |
x = kps[index][:, 0] | |
y = kps[index][:, 1] | |
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5 | |
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1])) | |
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1) | |
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color) | |
out_img = (out_img * 0.6).astype(np.uint8) | |
for idx_kp, kp in enumerate(kps): | |
color = color_list[idx_kp] | |
x, y = kp | |
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1) | |
return out_img.astype(np.uint8) | |
if not self.antelopev2_installed: | |
self.install_antelopev2() | |
self.load_model() | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
img, remove_pad = resize_image_with_pad(img, res) | |
faces = self.model.get(img) | |
face_info = InsightFaceModel.pick_largest_face(faces) | |
if return_keypoints: | |
return remove_pad(draw_kps(img, face_info['kps'])), True | |
else: | |
return torch.from_numpy(face_info['embedding']), False | |
g_insight_face_model = InsightFaceModel() | |
g_insight_face_instant_id_model = InsightFaceModel(face_analysis_model_name="antelopev2") | |
class FaceIdPlusInput: | |
face_embed: torch.Tensor | |
clip_embed: CLIPVisionModelOutput | |
def face_id_plus(img, low_vram=False, **kwargs): | |
""" FaceID plus uses both face_embeding from insightface and clip_embeding from clip. """ | |
face_embed, _ = g_insight_face_model.run_model(img) | |
clip_embed, _ = clip(img, config='clip_h', low_vram=low_vram) | |
return FaceIdPlusInput(face_embed, clip_embed), False | |
class HandRefinerModel: | |
def __init__(self): | |
self.model = None | |
self.device = devices.get_device_for("controlnet") | |
def load_model(self): | |
if self.model is None: | |
from annotator.annotator_path import models_path | |
from hand_refiner import MeshGraphormerDetector # installed via hand_refiner_portable | |
with Extra(torch_handler): | |
self.model = MeshGraphormerDetector.from_pretrained( | |
"hr16/ControlNet-HandRefiner-pruned", | |
cache_dir=os.path.join(models_path, "hand_refiner"), | |
device=self.device, | |
) | |
else: | |
self.model.to(self.device) | |
def unload(self): | |
if self.model is not None: | |
self.model.to("cpu") | |
def run_model(self, img, res=512, **kwargs): | |
img, remove_pad = resize_image_with_pad(img, res) | |
self.load_model() | |
with Extra(torch_handler): | |
depth_map, mask, info = self.model( | |
img, output_type="np", | |
detect_resolution=res, | |
mask_bbox_padding=30, | |
) | |
return remove_pad(depth_map), True | |
g_hand_refiner_model = HandRefinerModel() | |