Delete app-backup.py
Browse files- app-backup.py +0 -351
app-backup.py
DELETED
@@ -1,351 +0,0 @@
|
|
1 |
-
import spaces
|
2 |
-
import random
|
3 |
-
import torch
|
4 |
-
import cv2
|
5 |
-
import gradio as gr
|
6 |
-
import numpy as np
|
7 |
-
from huggingface_hub import snapshot_download
|
8 |
-
from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
|
9 |
-
from diffusers.utils import load_image
|
10 |
-
from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
|
11 |
-
from kolors.models.modeling_chatglm import ChatGLMModel
|
12 |
-
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
|
13 |
-
from kolors.models.controlnet import ControlNetModel
|
14 |
-
from diffusers import AutoencoderKL
|
15 |
-
from kolors.models.unet_2d_condition import UNet2DConditionModel
|
16 |
-
from diffusers import EulerDiscreteScheduler
|
17 |
-
from PIL import Image
|
18 |
-
from annotator.midas import MidasDetector
|
19 |
-
from annotator.dwpose import DWposeDetector
|
20 |
-
from annotator.util import resize_image, HWC3
|
21 |
-
|
22 |
-
|
23 |
-
device = "cuda"
|
24 |
-
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
25 |
-
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
|
26 |
-
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
|
27 |
-
ckpt_dir_pose = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Pose")
|
28 |
-
|
29 |
-
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
|
30 |
-
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
|
31 |
-
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
|
32 |
-
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
|
33 |
-
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
|
34 |
-
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
|
35 |
-
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
|
36 |
-
controlnet_pose = ControlNetModel.from_pretrained(f"{ckpt_dir_pose}", revision=None).half().to(device)
|
37 |
-
|
38 |
-
pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
|
39 |
-
vae=vae,
|
40 |
-
controlnet = controlnet_depth,
|
41 |
-
text_encoder=text_encoder,
|
42 |
-
tokenizer=tokenizer,
|
43 |
-
unet=unet,
|
44 |
-
scheduler=scheduler,
|
45 |
-
force_zeros_for_empty_prompt=False
|
46 |
-
)
|
47 |
-
|
48 |
-
pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
|
49 |
-
vae=vae,
|
50 |
-
controlnet = controlnet_canny,
|
51 |
-
text_encoder=text_encoder,
|
52 |
-
tokenizer=tokenizer,
|
53 |
-
unet=unet,
|
54 |
-
scheduler=scheduler,
|
55 |
-
force_zeros_for_empty_prompt=False
|
56 |
-
)
|
57 |
-
|
58 |
-
pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
|
59 |
-
vae=vae,
|
60 |
-
controlnet = controlnet_pose,
|
61 |
-
text_encoder=text_encoder,
|
62 |
-
tokenizer=tokenizer,
|
63 |
-
unet=unet,
|
64 |
-
scheduler=scheduler,
|
65 |
-
force_zeros_for_empty_prompt=False
|
66 |
-
)
|
67 |
-
|
68 |
-
@spaces.GPU
|
69 |
-
def process_canny_condition(image, canny_threods=[100,200]):
|
70 |
-
np_image = image.copy()
|
71 |
-
np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
|
72 |
-
np_image = np_image[:, :, None]
|
73 |
-
np_image = np.concatenate([np_image, np_image, np_image], axis=2)
|
74 |
-
np_image = HWC3(np_image)
|
75 |
-
return Image.fromarray(np_image)
|
76 |
-
|
77 |
-
model_midas = MidasDetector()
|
78 |
-
@spaces.GPU
|
79 |
-
def process_depth_condition_midas(img, res = 1024):
|
80 |
-
h,w,_ = img.shape
|
81 |
-
img = resize_image(HWC3(img), res)
|
82 |
-
result = HWC3(model_midas(img))
|
83 |
-
result = cv2.resize(result, (w,h))
|
84 |
-
return Image.fromarray(result)
|
85 |
-
|
86 |
-
model_dwpose = DWposeDetector()
|
87 |
-
@spaces.GPU
|
88 |
-
def process_dwpose_condition(image, res=1024):
|
89 |
-
h,w,_ = image.shape
|
90 |
-
img = resize_image(HWC3(image), res)
|
91 |
-
out_res, out_img = model_dwpose(image)
|
92 |
-
result = HWC3(out_img)
|
93 |
-
result = cv2.resize( result, (w,h) )
|
94 |
-
return Image.fromarray(result)
|
95 |
-
|
96 |
-
MAX_SEED = np.iinfo(np.int32).max
|
97 |
-
MAX_IMAGE_SIZE = 1024
|
98 |
-
|
99 |
-
@spaces.GPU
|
100 |
-
def infer_depth(prompt,
|
101 |
-
image = None,
|
102 |
-
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
103 |
-
seed = 397886929,
|
104 |
-
randomize_seed = False,
|
105 |
-
guidance_scale = 6.0,
|
106 |
-
num_inference_steps = 50,
|
107 |
-
controlnet_conditioning_scale = 0.7,
|
108 |
-
control_guidance_end = 0.9,
|
109 |
-
strength = 1.0
|
110 |
-
):
|
111 |
-
if randomize_seed:
|
112 |
-
seed = random.randint(0, MAX_SEED)
|
113 |
-
generator = torch.Generator().manual_seed(seed)
|
114 |
-
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
115 |
-
pipe = pipe_depth.to("cuda")
|
116 |
-
condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
|
117 |
-
image = pipe(
|
118 |
-
prompt= prompt ,
|
119 |
-
image = init_image,
|
120 |
-
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
121 |
-
control_guidance_end = control_guidance_end,
|
122 |
-
strength= strength ,
|
123 |
-
control_image = condi_img,
|
124 |
-
negative_prompt= negative_prompt ,
|
125 |
-
num_inference_steps= num_inference_steps,
|
126 |
-
guidance_scale= guidance_scale,
|
127 |
-
num_images_per_prompt=1,
|
128 |
-
generator=generator,
|
129 |
-
).images[0]
|
130 |
-
return [condi_img, image], seed
|
131 |
-
|
132 |
-
@spaces.GPU
|
133 |
-
def infer_canny(prompt,
|
134 |
-
image = None,
|
135 |
-
negative_prompt = "nsfw,脸部阴影���低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
136 |
-
seed = 397886929,
|
137 |
-
randomize_seed = False,
|
138 |
-
guidance_scale = 6.0,
|
139 |
-
num_inference_steps = 50,
|
140 |
-
controlnet_conditioning_scale = 0.7,
|
141 |
-
control_guidance_end = 0.9,
|
142 |
-
strength = 1.0
|
143 |
-
):
|
144 |
-
if randomize_seed:
|
145 |
-
seed = random.randint(0, MAX_SEED)
|
146 |
-
generator = torch.Generator().manual_seed(seed)
|
147 |
-
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
148 |
-
pipe = pipe_canny.to("cuda")
|
149 |
-
condi_img = process_canny_condition(np.array(init_image))
|
150 |
-
image = pipe(
|
151 |
-
prompt= prompt ,
|
152 |
-
image = init_image,
|
153 |
-
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
154 |
-
control_guidance_end = control_guidance_end,
|
155 |
-
strength= strength ,
|
156 |
-
control_image = condi_img,
|
157 |
-
negative_prompt= negative_prompt ,
|
158 |
-
num_inference_steps= num_inference_steps,
|
159 |
-
guidance_scale= guidance_scale,
|
160 |
-
num_images_per_prompt=1,
|
161 |
-
generator=generator,
|
162 |
-
).images[0]
|
163 |
-
return [condi_img, image], seed
|
164 |
-
|
165 |
-
@spaces.GPU
|
166 |
-
def infer_pose(prompt,
|
167 |
-
image = None,
|
168 |
-
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
169 |
-
seed = 66,
|
170 |
-
randomize_seed = False,
|
171 |
-
guidance_scale = 6.0,
|
172 |
-
num_inference_steps = 50,
|
173 |
-
controlnet_conditioning_scale = 0.7,
|
174 |
-
control_guidance_end = 0.9,
|
175 |
-
strength = 1.0
|
176 |
-
):
|
177 |
-
if randomize_seed:
|
178 |
-
seed = random.randint(0, MAX_SEED)
|
179 |
-
generator = torch.Generator().manual_seed(seed)
|
180 |
-
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
181 |
-
pipe = pipe_pose.to("cuda")
|
182 |
-
condi_img = process_dwpose_condition(np.array(init_image), MAX_IMAGE_SIZE)
|
183 |
-
image = pipe(
|
184 |
-
prompt= prompt ,
|
185 |
-
image = init_image,
|
186 |
-
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
187 |
-
control_guidance_end = control_guidance_end,
|
188 |
-
strength= strength ,
|
189 |
-
control_image = condi_img,
|
190 |
-
negative_prompt= negative_prompt ,
|
191 |
-
num_inference_steps= num_inference_steps,
|
192 |
-
guidance_scale= guidance_scale,
|
193 |
-
num_images_per_prompt=1,
|
194 |
-
generator=generator,
|
195 |
-
).images[0]
|
196 |
-
return [condi_img, image], seed
|
197 |
-
|
198 |
-
|
199 |
-
canny_examples = [
|
200 |
-
["아름다운 소녀, 고품질, 매우 선명, 생생한 색상, 초고해상도, 최상의 품질, 8k, 고화질, 4K",
|
201 |
-
"image/woman_1.png"],
|
202 |
-
["파노라마, 컵 안에 앉아있는 귀여운 흰 강아지, 카메라를 바라보는, 애니메이션 스타일, 3D 렌더링, 옥테인 렌더",
|
203 |
-
"image/dog.png"]
|
204 |
-
]
|
205 |
-
|
206 |
-
depth_examples = [
|
207 |
-
["신카이 마코토 스타일, 풍부한 색감, 초록 셔츠를 입은 여성이 들판에 서 있는, 아름다운 풍경, 맑고 밝은, 얼룩진 빛과 그림자, 최고의 품질, 초세밀, 8K 화질",
|
208 |
-
"image/woman_2.png"],
|
209 |
-
["화려한 색상의 작은 새, 고품질, 매우 선명, 생생한 색상, 초고해상도, 최상의 품질, 8k, 고화질, 4K",
|
210 |
-
"image/bird.png"]
|
211 |
-
]
|
212 |
-
|
213 |
-
pose_examples = [
|
214 |
-
["보라색 퍼프 슬리브 드레스를 입고 왕관과 흰색 레이스 장갑을 낀 소녀가 양 손으로 얼굴을 감싸고 있는, 고품질, 매우 선명, 생생한 색상, 초고해상도, 최상의 품질, 8k, 고화질, 4K",
|
215 |
-
"image/woman_3.png"],
|
216 |
-
["검은색 스포츠 재킷과 흰색 이너를 입고 목걸이를 한 여성이 거리에 서 있는, 배경은 빨간 건물과 녹색 나무, 고품질, 매우 선명, 생생한 색상, 초고해상도, 최상의 품질, 8k, 고화질, 4K",
|
217 |
-
"image/woman_4.png"]
|
218 |
-
]
|
219 |
-
|
220 |
-
css = """
|
221 |
-
footer {
|
222 |
-
visibility: hidden;
|
223 |
-
}
|
224 |
-
"""
|
225 |
-
|
226 |
-
|
227 |
-
def load_description(fp):
|
228 |
-
with open(fp, 'r', encoding='utf-8') as f:
|
229 |
-
content = f.read()
|
230 |
-
return content
|
231 |
-
|
232 |
-
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
|
233 |
-
with gr.Row():
|
234 |
-
with gr.Column(elem_id="col-left"):
|
235 |
-
with gr.Row():
|
236 |
-
prompt = gr.Textbox(
|
237 |
-
label="프롬프트",
|
238 |
-
placeholder="프롬프트를 입력하세요",
|
239 |
-
lines=2
|
240 |
-
)
|
241 |
-
with gr.Row():
|
242 |
-
image = gr.Image(label="이미지", type="pil")
|
243 |
-
with gr.Accordion("고급 설정", open=False):
|
244 |
-
negative_prompt = gr.Textbox(
|
245 |
-
label="네거티브 프롬프트",
|
246 |
-
placeholder="네거티브 프롬프트를 입력하세요",
|
247 |
-
visible=True,
|
248 |
-
value="nsfw, 얼굴 그림자, 저해상도, jpeg 아티팩트, 흐릿함, 열악함, 검은 얼굴, 네온 조명"
|
249 |
-
)
|
250 |
-
seed = gr.Slider(
|
251 |
-
label="시드",
|
252 |
-
minimum=0,
|
253 |
-
maximum=MAX_SEED,
|
254 |
-
step=1,
|
255 |
-
value=0,
|
256 |
-
)
|
257 |
-
randomize_seed = gr.Checkbox(label="시드 무작위화", value=True)
|
258 |
-
with gr.Row():
|
259 |
-
guidance_scale = gr.Slider(
|
260 |
-
label="가이던스 스케일",
|
261 |
-
minimum=0.0,
|
262 |
-
maximum=10.0,
|
263 |
-
step=0.1,
|
264 |
-
value=6.0,
|
265 |
-
)
|
266 |
-
num_inference_steps = gr.Slider(
|
267 |
-
label="추론 단계 수",
|
268 |
-
minimum=10,
|
269 |
-
maximum=50,
|
270 |
-
step=1,
|
271 |
-
value=30,
|
272 |
-
)
|
273 |
-
with gr.Row():
|
274 |
-
controlnet_conditioning_scale = gr.Slider(
|
275 |
-
label="컨트롤넷 컨디셔닝 스케일",
|
276 |
-
minimum=0.0,
|
277 |
-
maximum=1.0,
|
278 |
-
step=0.1,
|
279 |
-
value=0.7,
|
280 |
-
)
|
281 |
-
control_guidance_end = gr.Slider(
|
282 |
-
label="컨트롤 가이던스 종료",
|
283 |
-
minimum=0.0,
|
284 |
-
maximum=1.0,
|
285 |
-
step=0.1,
|
286 |
-
value=0.9,
|
287 |
-
)
|
288 |
-
with gr.Row():
|
289 |
-
strength = gr.Slider(
|
290 |
-
label="강도",
|
291 |
-
minimum=0.0,
|
292 |
-
maximum=1.0,
|
293 |
-
step=0.1,
|
294 |
-
value=1.0,
|
295 |
-
)
|
296 |
-
with gr.Row():
|
297 |
-
canny_button = gr.Button("캐니", elem_id="button")
|
298 |
-
depth_button = gr.Button("깊이", elem_id="button")
|
299 |
-
pose_button = gr.Button("포즈", elem_id="button")
|
300 |
-
|
301 |
-
with gr.Column(elem_id="col-right"):
|
302 |
-
result = gr.Gallery(label="결과", show_label=False, columns=2)
|
303 |
-
seed_used = gr.Number(label="사용된 시드")
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
with gr.Row():
|
308 |
-
gr.Examples(
|
309 |
-
fn = infer_canny,
|
310 |
-
examples = canny_examples,
|
311 |
-
inputs = [prompt, image],
|
312 |
-
outputs = [result, seed_used],
|
313 |
-
label = "Canny"
|
314 |
-
)
|
315 |
-
with gr.Row():
|
316 |
-
gr.Examples(
|
317 |
-
fn = infer_depth,
|
318 |
-
examples = depth_examples,
|
319 |
-
inputs = [prompt, image],
|
320 |
-
outputs = [result, seed_used],
|
321 |
-
label = "Depth"
|
322 |
-
)
|
323 |
-
|
324 |
-
with gr.Row():
|
325 |
-
gr.Examples(
|
326 |
-
fn = infer_pose,
|
327 |
-
examples = pose_examples,
|
328 |
-
inputs = [prompt, image],
|
329 |
-
outputs = [result, seed_used],
|
330 |
-
label = "Pose"
|
331 |
-
)
|
332 |
-
|
333 |
-
canny_button.click(
|
334 |
-
fn = infer_canny,
|
335 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
336 |
-
outputs = [result, seed_used]
|
337 |
-
)
|
338 |
-
|
339 |
-
depth_button.click(
|
340 |
-
fn = infer_depth,
|
341 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
342 |
-
outputs = [result, seed_used]
|
343 |
-
)
|
344 |
-
|
345 |
-
pose_button.click(
|
346 |
-
fn = infer_pose,
|
347 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
348 |
-
outputs = [result, seed_used]
|
349 |
-
)
|
350 |
-
|
351 |
-
Kolors.queue().launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|