Spaces:
Paused
Paused
Omer Karisman
commited on
Commit
•
588d8f0
1
Parent(s):
8daac10
zero gpu
Browse files- app.py +13 -13
- omni_zero.py +148 -14
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
-
|
4 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
5 |
|
6 |
import torch
|
@@ -9,22 +11,19 @@ import torch
|
|
9 |
torch.jit.script = lambda f: f
|
10 |
####
|
11 |
|
12 |
-
|
|
|
|
|
|
|
13 |
from diffusers import DPMSolverMultistepScheduler
|
14 |
-
from diffusers.models import ControlNetModel
|
15 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
16 |
-
|
17 |
-
from
|
18 |
-
|
19 |
-
from pipeline import OmniZeroPipeline
|
20 |
from insightface.app import FaceAnalysis
|
21 |
-
from
|
22 |
-
from
|
23 |
-
|
24 |
-
import cv2
|
25 |
-
import numpy as np
|
26 |
|
27 |
-
import PIL
|
28 |
|
29 |
def patch_onnx_runtime(
|
30 |
self,
|
@@ -33,6 +32,7 @@ def patch_onnx_runtime(
|
|
33 |
omp_num_threads: int = 16,
|
34 |
):
|
35 |
import os
|
|
|
36 |
import onnxruntime as ort
|
37 |
|
38 |
os.environ["OMP_NUM_THREADS"] = str(omp_num_threads)
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import spaces
|
5 |
+
|
6 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
7 |
|
8 |
import torch
|
|
|
11 |
torch.jit.script = lambda f: f
|
12 |
####
|
13 |
|
14 |
+
import cv2
|
15 |
+
import numpy as np
|
16 |
+
import PIL
|
17 |
+
from controlnet_aux import ZoeDetector
|
18 |
from diffusers import DPMSolverMultistepScheduler
|
|
|
19 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
20 |
+
from diffusers.models import ControlNetModel
|
21 |
+
from huggingface_hub import snapshot_download
|
|
|
|
|
22 |
from insightface.app import FaceAnalysis
|
23 |
+
from pipeline import OmniZeroPipeline
|
24 |
+
from transformers import CLIPVisionModelWithProjection
|
25 |
+
from utils import align_images, draw_kps, load_and_resize_image
|
|
|
|
|
26 |
|
|
|
27 |
|
28 |
def patch_onnx_runtime(
|
29 |
self,
|
|
|
32 |
omp_num_threads: int = 16,
|
33 |
):
|
34 |
import os
|
35 |
+
|
36 |
import onnxruntime as ort
|
37 |
|
38 |
os.environ["OMP_NUM_THREADS"] = str(omp_num_threads)
|
omni_zero.py
CHANGED
@@ -1,31 +1,164 @@
|
|
1 |
import os
|
|
|
2 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
3 |
|
4 |
import sys
|
|
|
5 |
sys.path.insert(0, './diffusers/src')
|
6 |
|
|
|
|
|
|
|
7 |
import torch
|
8 |
-
|
9 |
-
|
10 |
-
from huggingface_hub import snapshot_download
|
11 |
from diffusers import DPMSolverMultistepScheduler
|
12 |
-
from diffusers.models import ControlNetModel
|
13 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
14 |
-
|
|
|
|
|
|
|
15 |
from transformers import CLIPVisionModelWithProjection
|
|
|
16 |
|
17 |
-
from pipeline import OmniZeroPipeline
|
18 |
-
from insightface.app import FaceAnalysis
|
19 |
-
from controlnet_aux import ZoeDetector
|
20 |
-
from utils import draw_kps, load_and_resize_image, align_images
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
import numpy as np
|
26 |
-
from torchvision.transforms import functional as TVF
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
class OmniZeroCouple():
|
31 |
def __init__(self,
|
@@ -214,6 +347,7 @@ class OmniZeroCouple():
|
|
214 |
omp_num_threads: int = 16,
|
215 |
):
|
216 |
import os
|
|
|
217 |
import onnxruntime as ort
|
218 |
|
219 |
os.environ["OMP_NUM_THREADS"] = str(omp_num_threads)
|
|
|
1 |
import os
|
2 |
+
|
3 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
4 |
|
5 |
import sys
|
6 |
+
|
7 |
sys.path.insert(0, './diffusers/src')
|
8 |
|
9 |
+
import cv2
|
10 |
+
import numpy as np
|
11 |
+
import PIL
|
12 |
import torch
|
13 |
+
from controlnet_aux import ZoeDetector
|
|
|
|
|
14 |
from diffusers import DPMSolverMultistepScheduler
|
|
|
15 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
16 |
+
from diffusers.models import ControlNetModel
|
17 |
+
from huggingface_hub import snapshot_download
|
18 |
+
from insightface.app import FaceAnalysis
|
19 |
+
from pipeline import OmniZeroPipeline
|
20 |
from transformers import CLIPVisionModelWithProjection
|
21 |
+
from utils import align_images, draw_kps, load_and_resize_image
|
22 |
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
class OmniZeroSingle():
|
25 |
+
def __init__(self,
|
26 |
+
base_model="stabilityai/stable-diffusion-xl-base-1.0",
|
27 |
+
device="cuda",
|
28 |
+
):
|
29 |
+
snapshot_download("okaris/antelopev2", local_dir="./models/antelopev2")
|
30 |
+
self.face_analysis = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
31 |
+
self.face_analysis.prepare(ctx_id=0, det_size=(640, 640))
|
32 |
|
33 |
+
dtype = torch.float16
|
|
|
|
|
34 |
|
35 |
+
ip_adapter_plus_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
36 |
+
"h94/IP-Adapter",
|
37 |
+
subfolder="models/image_encoder",
|
38 |
+
torch_dtype=dtype,
|
39 |
+
).to(device)
|
40 |
+
|
41 |
+
zoedepthnet_path = "okaris/zoe-depth-controlnet-xl"
|
42 |
+
zoedepthnet = ControlNetModel.from_pretrained(zoedepthnet_path,torch_dtype=dtype).to(device)
|
43 |
+
|
44 |
+
identitiynet_path = "okaris/face-controlnet-xl"
|
45 |
+
identitynet = ControlNetModel.from_pretrained(identitiynet_path, torch_dtype=dtype).to(device)
|
46 |
+
|
47 |
+
self.zoe_depth_detector = ZoeDetector.from_pretrained("lllyasviel/Annotators").to(device)
|
48 |
+
|
49 |
+
self.pipeline = OmniZeroPipeline.from_pretrained(
|
50 |
+
base_model,
|
51 |
+
controlnet=[identitynet, zoedepthnet],
|
52 |
+
torch_dtype=dtype,
|
53 |
+
image_encoder=ip_adapter_plus_image_encoder,
|
54 |
+
).to(device)
|
55 |
+
|
56 |
+
config = self.pipeline.scheduler.config
|
57 |
+
config["timestep_spacing"] = "trailing"
|
58 |
+
self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++", final_sigmas_type="zero")
|
59 |
+
|
60 |
+
self.pipeline.load_ip_adapter(["okaris/ip-adapter-instantid", "h94/IP-Adapter", "h94/IP-Adapter"], subfolder=[None, "sdxl_models", "sdxl_models"], weight_name=["ip-adapter-instantid.bin", "ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus_sdxl_vit-h.safetensors"])
|
61 |
+
|
62 |
+
def get_largest_face_embedding_and_kps(self, image, target_image=None):
|
63 |
+
face_info = self.face_analysis.get(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))
|
64 |
+
if len(face_info) == 0:
|
65 |
+
return None, None
|
66 |
+
largest_face = sorted(face_info, key=lambda x: x['bbox'][2] * x['bbox'][3], reverse=True)[0]
|
67 |
+
face_embedding = torch.tensor(largest_face['embedding']).to("cuda")
|
68 |
+
if target_image is None:
|
69 |
+
target_image = image
|
70 |
+
zeros = np.zeros((target_image.size[1], target_image.size[0], 3), dtype=np.uint8)
|
71 |
+
face_kps_image = draw_kps(zeros, largest_face['kps'])
|
72 |
+
return face_embedding, face_kps_image
|
73 |
+
|
74 |
+
def generate(self,
|
75 |
+
seed=42,
|
76 |
+
prompt="A person",
|
77 |
+
negative_prompt="blurry, out of focus",
|
78 |
+
guidance_scale=3.0,
|
79 |
+
number_of_images=1,
|
80 |
+
number_of_steps=10,
|
81 |
+
base_image=None,
|
82 |
+
base_image_strength=0.15,
|
83 |
+
composition_image=None,
|
84 |
+
composition_image_strength=1.0,
|
85 |
+
style_image=None,
|
86 |
+
style_image_strength=1.0,
|
87 |
+
identity_image=None,
|
88 |
+
identity_image_strength=1.0,
|
89 |
+
depth_image=None,
|
90 |
+
depth_image_strength=0.5,
|
91 |
+
):
|
92 |
+
resolution = 1024
|
93 |
+
|
94 |
+
if base_image is not None:
|
95 |
+
base_image = load_and_resize_image(base_image, resolution, resolution)
|
96 |
+
else:
|
97 |
+
if composition_image is not None:
|
98 |
+
base_image = load_and_resize_image(composition_image, resolution, resolution)
|
99 |
+
else:
|
100 |
+
raise ValueError("You must provide a base image or a composition image")
|
101 |
+
|
102 |
+
if depth_image is None:
|
103 |
+
depth_image = self.zoe_depth_detector(base_image, detect_resolution=resolution, image_resolution=resolution)
|
104 |
+
else:
|
105 |
+
depth_image = load_and_resize_image(depth_image, resolution, resolution)
|
106 |
+
|
107 |
+
base_image, depth_image = align_images(base_image, depth_image)
|
108 |
+
|
109 |
+
if composition_image is not None:
|
110 |
+
composition_image = load_and_resize_image(composition_image, resolution, resolution)
|
111 |
+
else:
|
112 |
+
composition_image = base_image
|
113 |
+
|
114 |
+
if style_image is not None:
|
115 |
+
style_image = load_and_resize_image(style_image, resolution, resolution)
|
116 |
+
else:
|
117 |
+
raise ValueError("You must provide a style image")
|
118 |
+
|
119 |
+
if identity_image is not None:
|
120 |
+
identity_image = load_and_resize_image(identity_image, resolution, resolution)
|
121 |
+
else:
|
122 |
+
raise ValueError("You must provide an identity image")
|
123 |
+
|
124 |
+
face_embedding_identity_image, target_kps = self.get_largest_face_embedding_and_kps(identity_image, base_image)
|
125 |
+
if face_embedding_identity_image is None:
|
126 |
+
raise ValueError("No face found in the identity image, the image might be cropped too tightly or the face is too small")
|
127 |
+
|
128 |
+
face_embedding_base_image, face_kps_base_image = self.get_largest_face_embedding_and_kps(base_image)
|
129 |
+
if face_embedding_base_image is not None:
|
130 |
+
target_kps = face_kps_base_image
|
131 |
+
|
132 |
+
self.pipeline.set_ip_adapter_scale([identity_image_strength,
|
133 |
+
{
|
134 |
+
"down": { "block_2": [0.0, 0.0] },
|
135 |
+
"up": { "block_0": [0.0, style_image_strength, 0.0] }
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"down": { "block_2": [0.0, composition_image_strength] },
|
139 |
+
"up": { "block_0": [0.0, 0.0, 0.0] }
|
140 |
+
}
|
141 |
+
])
|
142 |
+
|
143 |
+
generator = torch.Generator(device="cpu").manual_seed(seed)
|
144 |
+
|
145 |
+
images = self.pipeline(
|
146 |
+
prompt=prompt,
|
147 |
+
negative_prompt=negative_prompt,
|
148 |
+
guidance_scale=guidance_scale,
|
149 |
+
ip_adapter_image=[face_embedding_identity_image, style_image, composition_image],
|
150 |
+
image=base_image,
|
151 |
+
control_image=[target_kps, depth_image],
|
152 |
+
controlnet_conditioning_scale=[identity_image_strength, depth_image_strength],
|
153 |
+
identity_control_indices=[(0,0)],
|
154 |
+
num_inference_steps=number_of_steps,
|
155 |
+
num_images_per_prompt=number_of_images,
|
156 |
+
strength=(1-base_image_strength),
|
157 |
+
generator=generator,
|
158 |
+
seed=seed,
|
159 |
+
).images
|
160 |
+
|
161 |
+
return images
|
162 |
|
163 |
class OmniZeroCouple():
|
164 |
def __init__(self,
|
|
|
347 |
omp_num_threads: int = 16,
|
348 |
):
|
349 |
import os
|
350 |
+
|
351 |
import onnxruntime as ort
|
352 |
|
353 |
os.environ["OMP_NUM_THREADS"] = str(omp_num_threads)
|