Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,142 +1,114 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
import random
|
4 |
-
|
5 |
-
from diffusers import DiffusionPipeline
|
6 |
import torch
|
7 |
|
8 |
-
|
9 |
-
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
15 |
|
16 |
-
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
17 |
-
pipe = pipe.to(device)
|
18 |
|
19 |
-
|
20 |
-
MAX_IMAGE_SIZE = 1024
|
21 |
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
height = height,
|
37 |
-
generator = generator
|
38 |
-
).images[0]
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
max-width: 640px;
|
52 |
-
}
|
53 |
-
"""
|
54 |
-
|
55 |
-
with gr.Blocks(css=css) as demo:
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
placeholder="Enter your prompt",
|
69 |
-
container=False,
|
70 |
-
)
|
71 |
-
|
72 |
-
run_button = gr.Button("Run", scale=0)
|
73 |
-
|
74 |
-
result = gr.Image(label="Result", show_label=False)
|
75 |
-
|
76 |
-
with gr.Accordion("Advanced Settings", open=False):
|
77 |
-
|
78 |
-
negative_prompt = gr.Text(
|
79 |
-
label="Negative prompt",
|
80 |
-
max_lines=1,
|
81 |
-
placeholder="Enter a negative prompt",
|
82 |
-
visible=False,
|
83 |
-
)
|
84 |
-
|
85 |
-
seed = gr.Slider(
|
86 |
-
label="Seed",
|
87 |
-
minimum=0,
|
88 |
-
maximum=MAX_SEED,
|
89 |
-
step=1,
|
90 |
-
value=0,
|
91 |
-
)
|
92 |
-
|
93 |
-
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
94 |
-
|
95 |
-
with gr.Row():
|
96 |
-
|
97 |
-
width = gr.Slider(
|
98 |
-
label="Width",
|
99 |
-
minimum=256,
|
100 |
-
maximum=MAX_IMAGE_SIZE,
|
101 |
-
step=32,
|
102 |
-
value=1024, #Replace with defaults that work for your model
|
103 |
-
)
|
104 |
-
|
105 |
-
height = gr.Slider(
|
106 |
-
label="Height",
|
107 |
-
minimum=256,
|
108 |
-
maximum=MAX_IMAGE_SIZE,
|
109 |
-
step=32,
|
110 |
-
value=1024, #Replace with defaults that work for your model
|
111 |
-
)
|
112 |
-
|
113 |
-
with gr.Row():
|
114 |
-
|
115 |
-
guidance_scale = gr.Slider(
|
116 |
-
label="Guidance scale",
|
117 |
-
minimum=0.0,
|
118 |
-
maximum=10.0,
|
119 |
-
step=0.1,
|
120 |
-
value=0.0, #Replace with defaults that work for your model
|
121 |
-
)
|
122 |
-
|
123 |
-
num_inference_steps = gr.Slider(
|
124 |
-
label="Number of inference steps",
|
125 |
-
minimum=1,
|
126 |
-
maximum=50,
|
127 |
-
step=1,
|
128 |
-
value=2, #Replace with defaults that work for your model
|
129 |
-
)
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
)
|
141 |
-
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
+
#import random
|
4 |
+
import spaces #[uncomment to use ZeroGPU]
|
5 |
+
#from diffusers import DiffusionPipeline
|
6 |
import torch
|
7 |
|
8 |
+
from diffusers import AutoPipelineForInpainting
|
9 |
+
from diffusers.utils import load_image
|
10 |
+
from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
|
11 |
|
12 |
+
#import cv2
|
13 |
+
#import matplotlib.pyplot as plt
|
14 |
+
from PIL import Image
|
15 |
+
import os
|
16 |
+
import gc
|
17 |
|
|
|
|
|
18 |
|
19 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
20 |
|
21 |
+
GDINO_MODEL_NAME="IDEA-Research/grounding-dino-tiny"
|
22 |
+
SAM_MODEL_NAME="facebook/sam-vit-base"
|
23 |
|
24 |
+
GDINO=pipeline(model=GDINO_MODEL_NAME, task="zero-shot-object-detection", device=DEVICE)
|
25 |
+
SAM=AutoModelForMaskGeneration.from_pretrained(SAM_MODEL_NAME).to(DEVICE)
|
26 |
+
SAM_PROCESSOR=AutoProcessor.from_pretrained(SAM_MODEL_NAME)
|
27 |
+
|
28 |
+
SD_MODEL="diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
|
29 |
+
SD_PIPLINE = AutoPipelineForInpainting.from_pretrained(SD_MODEL, torch_dtype=torch.float16).to(DEVICE)
|
30 |
+
IP_ADAPTER="h94/IP-Adapter"
|
31 |
+
SUB_FOLDER="sdxl_models"
|
32 |
+
IP_WEIGHT_NAME="ip-adapter_sdxl.bin"
|
33 |
+
SD_PIPLINE.load_ip_adapter(IP_ADAPTER, subfolder=SUB_FOLDER, weight_name=IP_WEIGHT_NAME)
|
34 |
+
IP_SCALE=0.6
|
35 |
+
SD_PIPLINE.set_ip_adapter_scale(IP_SCALE)
|
36 |
+
|
37 |
+
GEN_STEPS=100
|
38 |
+
|
39 |
+
|
40 |
+
def refine_masks(masks: torch.BoolTensor)->np.array:
|
41 |
+
masks = masks.permute(0, 2, 3, 1)
|
42 |
+
masks = masks.float().mean(axis=-1)
|
43 |
+
return masks.cpu().numpy()
|
44 |
+
|
45 |
+
|
46 |
+
def get_boxes(detections:list)-> list:
|
47 |
+
boxes = []
|
48 |
+
for det in detections:
|
49 |
+
boxes.append([det['box']['xmin'], det['box']['ymin'],
|
50 |
+
det['box']['xmax'], det['box']['ymax']])
|
51 |
+
return [boxes]
|
52 |
+
|
53 |
+
|
54 |
+
def get_mask(img:Image, prompt:str, d_model:pipeline, s_model:AutoModelForMaskGeneration,
|
55 |
+
s_processor:AutoProcessor, device:str, threshold:float = 0.3)-> np.array:
|
56 |
|
57 |
+
labels = [label if label.endswith(".") else label+"." for label in ['face', prompt]]
|
58 |
+
dets=d_model(img, candidate_labels=labels, threshold=threshold)
|
59 |
+
|
60 |
+
boxes = get_boxes(dets)
|
61 |
+
inputs=s_processor(images=img, input_boxes=boxes, return_tensors="pt").to(DEVICE)
|
62 |
+
outputs = s_model(**inputs)
|
|
|
|
|
|
|
63 |
|
64 |
+
masks = s_processor.post_process_masks(
|
65 |
+
masks=outputs.pred_masks,
|
66 |
+
original_sizes=inputs.original_sizes,
|
67 |
+
reshaped_input_sizes=inputs.reshaped_input_sizes
|
68 |
+
)[0]
|
69 |
+
|
70 |
+
return refine_masks(masks)
|
71 |
+
|
72 |
+
|
73 |
+
def generate_result(model_img:str, cloth_img:str,
|
74 |
+
masks: np.array, prompt:str, sd_pipline:AutoPipelineForInpainting, n_steps:int=100)->Image:
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
width, height = model_img.size
|
77 |
+
|
78 |
+
cloth_mask=masks[1] #np.array(masks[1],dtype=np.float32)
|
79 |
+
generator = torch.Generator(device="cpu").manual_seed(4)
|
80 |
+
images = sd_pipline(
|
81 |
+
prompt=prompt,
|
82 |
+
image=model_img,
|
83 |
+
mask_image=cloth_mask,
|
84 |
+
ip_adapter_image=cloth_img,
|
85 |
+
generator=generator,
|
86 |
+
num_inference_steps=n_steps,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
).images
|
89 |
+
|
90 |
+
return images[0].resize((width, height))
|
91 |
+
|
92 |
+
|
93 |
+
@spaces.GPU
|
94 |
+
def run(model_img:Image, cloth_img:Image, cloth_class:str, close_description:str)->Image:
|
95 |
+
masks = get_mask(model_img, cloth_class, GDINO, SAM, SAM_PROCESSOR, DEVICE) #GSAM2)
|
96 |
+
result = generate_result(model_img, cloth_img, masks, close_description, SD_PIPLINE, GEN_STEPS)
|
97 |
+
gc.collect()
|
98 |
+
torch.cuda.empty_cache()
|
99 |
+
return result
|
100 |
+
|
101 |
+
|
102 |
+
gr.Interface(
|
103 |
+
run,
|
104 |
+
title = 'Virtual Try-On',
|
105 |
+
inputs=[
|
106 |
+
gr.Image(sources = 'upload', label='Model image', type = 'pil'),
|
107 |
+
gr.Image(sources = 'upload', label='Cloth image', type = 'pil'),
|
108 |
+
gr.Textbox(label = 'Cloth class'),
|
109 |
+
gr.Textbox(label = 'Close description')
|
110 |
+
],
|
111 |
+
outputs = [
|
112 |
+
gr.Image()
|
113 |
+
]
|
114 |
+
).launch(debug=True,share=True)
|