JackAILab commited on
Commit
865391b
Β·
verified Β·
1 Parent(s): ae14a9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -146
app.py CHANGED
@@ -1,154 +1,233 @@
1
  import gradio as gr
2
- import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
  import torch
 
 
 
 
8
 
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
  height=height,
 
 
 
 
 
 
48
  generator=generator,
 
 
49
  ).images[0]
50
 
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
66
-
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
- )
152
-
153
- if __name__ == "__main__":
154
- demo.launch()
 
1
  import gradio as gr
 
 
 
 
 
2
  import torch
3
+ import os
4
+ import glob
5
+ import spaces
6
+ import numpy as np
7
 
8
+ from datetime import datetime
9
+ from PIL import Image
10
+ from diffusers.utils import load_image
11
+ from diffusers import EulerDiscreteScheduler
12
+ from pipline_StableDiffusionXL_ConsistentID import ConsistentIDStableDiffusionXLPipeline
13
+ from huggingface_hub import hf_hub_download
14
+ ### Model can be imported from https://github.com/zllrunning/face-parsing.PyTorch?tab=readme-ov-file
15
+ ### We use the ckpt of 79999_iter.pth: https://drive.google.com/open?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812
16
+ ### Thanks for the open source of face-parsing model.
17
+ from models.BiSeNet.model import BiSeNet
18
+
19
+ # zero = torch.Tensor([0]).cuda()
20
+ # print(zero.device) # <-- 'cpu' πŸ€”
21
+ # device = zero.device # "cuda"
22
+ device = "cuda"
23
+
24
+ # Gets the absolute path of the current script
25
+ script_directory = os.path.dirname(os.path.realpath(__file__))
26
+
27
+ # download ConsistentID checkpoint to cache
28
+ base_model_path = "SG161222/RealVisXL_V3.0"
29
+ consistentID_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="ConsistentID_SDXL-v1.bin", repo_type="model")
30
+
31
+ ### Load base model
32
+ pipe = ConsistentIDStableDiffusionXLPipeline.from_pretrained(
33
+ base_model_path,
34
+ torch_dtype=torch.float16,
35
+ safety_checker=None, # use_safetensors=True,
36
+ variant="fp16"
37
+ ).to(device)
38
+
39
+ ### Load other pretrained models
40
+ ## BiSenet
41
+ bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
42
+ bise_net = BiSeNet(n_classes = 19)
43
+ bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
44
+ bise_net.cuda()
45
+
46
+ import sys
47
+ sys.path.append("./models/LLaVA")
48
+ from llava.model.builder import load_pretrained_model
49
+ from llava.mm_utils import get_model_name_from_path
50
+ from llava.eval.run_llava import eval_model
51
+
52
+ # Load Llava for prompt enhancement
53
+ llva_model_path = "liuhaotian/llava-v1.5-7b"
54
+ llva_tokenizer, llva_model, llva_image_processor, llva_context_len = load_pretrained_model(
55
+ model_path=llva_model_path,
56
+ model_base=None,
57
+ model_name=get_model_name_from_path(llva_model_path),)
58
+ # llva_tokenizer.cuda()
59
+ llva_model.to(device)
60
+ # llva_image_processor.to(device)
61
+
62
+ ### Load consistentID_model checkpoint
63
+ pipe.load_ConsistentID_model(
64
+ os.path.dirname(consistentID_path),
65
+ bise_net,
66
+ subfolder="",
67
+ weight_name=os.path.basename(consistentID_path),
68
+ trigger_word="img",
69
+ )
70
+ pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
71
+
72
+ ### Load to cuda
73
+ pipe.to(device)
74
+ pipe.image_encoder.to(device)
75
+ pipe.image_proj_model.to(device)
76
+ pipe.FacialEncoder.to(device)
77
+
78
+
79
+
80
+ @spaces.GPU
81
+ def process(selected_template_images,costum_image,prompt
82
+ ,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
83
+
84
+ if model_selected_tab==0:
85
+ select_images = load_image(Image.open(selected_template_images))
86
+ else:
87
+ select_images = load_image(Image.fromarray(costum_image))
88
+
89
+ if prompt_selected_tab==0:
90
+ prompt = prompt_selected
91
+ negative_prompt = ""
92
+ need_safetycheck = False
93
+ else:
94
+ need_safetycheck = True
95
+
96
+ # hyper-parameter
97
+ num_steps = 50
98
+ seed_set = torch.randint(0, 1000, (1,)).item()
99
+ # merge_steps = 30
100
+
101
+ @torch.inference_mode()
102
+ def Enhance_prompt(prompt,select_images):
103
+
104
+ llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
105
+ args = type('Args', (), {
106
+ "model_path": llva_model_path,
107
+ "model_base": None,
108
+ "model_name": get_model_name_from_path(llva_model_path),
109
+ "query": llva_prompt,
110
+ "conv_mode": None,
111
+ "image_file": select_images,
112
+ "sep": ",",
113
+ "temperature": 0,
114
+ "top_p": None,
115
+ "num_beams": 1,
116
+ "max_new_tokens": 512
117
+ })()
118
+ Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
119
+
120
+ return Enhanced_prompt
121
+
122
+ if prompt == "":
123
+ prompt = "A man, in a forest"
124
+ prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
125
+ prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
126
+ else:
127
+ prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
128
+ print(prompt)
129
+ pass
130
+
131
+ if negative_prompt == "":
132
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
133
+
134
+ #Extend Prompt
135
+ prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
136
+
137
+ negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
138
+ negative_prompt = negative_prompt + negtive_prompt_group
139
+
140
+ # seed = torch.randint(0, 1000, (1,)).item()
141
+ generator = torch.Generator(device=device).manual_seed(seed_set)
142
+
143
+ images = pipe(
144
  prompt=prompt,
145
+ width=width,
 
 
 
146
  height=height,
147
+ input_id_images=select_images,
148
+ input_image_path=input_image_path,
149
+ negative_prompt=negative_prompt,
150
+ num_images_per_prompt=1,
151
+ num_inference_steps=num_steps,
152
+ start_merge_step=merge_steps,
153
  generator=generator,
154
+ retouching=retouching,
155
+ need_safetycheck=need_safetycheck,
156
  ).images[0]
157
 
158
+ current_date = datetime.today()
159
+ return np.array(images)
160
+
161
+ # Gets the templates
162
+ script_directory = os.path.dirname(os.path.realpath(__file__))
163
+ preset_template = glob.glob("./images/templates/*.png")
164
+ preset_template = preset_template + glob.glob("./images/templates/*.jpg")
165
+
166
+
167
+ with gr.Blocks(title="ConsistentID_SDXL Demo") as demo:
168
+ gr.Markdown("# ConsistentID_SDXL Demo")
169
+ gr.Markdown("\
170
+ Put the reference figure to be redrawn into the box below (There is a small probability of referensing failure. You can submit it repeatedly)")
171
+ gr.Markdown("\
172
+ If you find our work interesting, please leave a star in GitHub for us!<br>\
173
+ https://github.com/JackAILab/ConsistentID")
174
+ with gr.Row():
175
+ with gr.Column():
176
+ model_selected_tab = gr.State(0)
177
+ with gr.TabItem("template images") as template_images_tab:
178
+ template_gallery_list = [(i, i) for i in preset_template]
179
+ gallery = gr.Gallery(template_gallery_list,columns=[4], rows=[2], object_fit="contain", height="auto",show_label=False)
180
+
181
+ def select_function(evt: gr.SelectData):
182
+ return preset_template[evt.index]
183
+
184
+ selected_template_images = gr.Text(show_label=False, visible=False, placeholder="Selected")
185
+ gallery.select(select_function, None, selected_template_images)
186
+ with gr.TabItem("Upload Image") as upload_image_tab:
187
+ costum_image = gr.Image(label="Upload Image")
188
+
189
+ model_selected_tabs = [template_images_tab, upload_image_tab]
190
+ for i, tab in enumerate(model_selected_tabs):
191
+ tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[model_selected_tab])
192
+
193
+ with gr.Column():
194
+ prompt_selected_tab = gr.State(0)
195
+ with gr.TabItem("template prompts") as template_prompts_tab:
196
+ prompt_selected = gr.Dropdown(value="A person, police officer, half body shot", elem_id='dropdown', choices=[
197
+ "A woman in a wedding dress",
198
+ "A woman, queen, in a gorgeous palace",
199
+ "A man sitting at the beach with sunset",
200
+ "A person, police officer, half body shot",
201
+ "A man, sailor, in a boat above ocean",
202
+ "A women wearing headphone, listening music",
203
+ "A man, firefighter, half body shot"], label=f"prepared prompts")
204
+
205
+ with gr.TabItem("custom prompt") as custom_prompt_tab:
206
+ prompt = gr.Textbox(label="prompt",placeholder="A man/woman wearing a santa hat")
207
+ nagetive_prompt = gr.Textbox(label="negative prompt",placeholder="monochrome, lowres, bad anatomy, worst quality, low quality, blurry")
208
+
209
+ prompt_selected_tabs = [template_prompts_tab, custom_prompt_tab]
210
+ for i, tab in enumerate(prompt_selected_tabs):
211
+ tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[prompt_selected_tab])
212
+
213
+ retouching = gr.Checkbox(label="face retouching",value=False,visible=False)
214
+ width = gr.Slider(label="image width",minimum=512,maximum=1280,value=864,step=8)
215
+ height = gr.Slider(label="image height",minimum=512,maximum=1280,value=1152,step=8)
216
+ width.release(lambda x,y: min(1280-x,y), inputs=[width,height], outputs=[height])
217
+ height.release(lambda x,y: min(1280-y,x), inputs=[width,height], outputs=[width])
218
+ merge_steps = gr.Slider(label="step starting to merge facial details(30 is recommended)",minimum=10,maximum=50,value=30,step=1)
219
+ seed_set = gr.Slider(label="set the random seed for different results",minimum=1,maximum=2147483647,value=2024,step=1)
220
+
221
+ btn = gr.Button("Run")
222
+ with gr.Column():
223
+ out = gr.Image(label="Output")
224
+ gr.Markdown('''
225
+ N.B.:<br/>
226
+ - If the proportion of face in the image is too small, the probability of an error will be slightly higher, and the similarity will also significantly decrease.)
227
+ - At the same time, use prompt with \"man\" or \"woman\" instead of \"person\" as much as possible, as that may cause the model to be confused whether the protagonist is male or female.
228
+ - Due to insufficient graphics memory on the demo server, there is an upper limit on the resolution for generating samples. We will support the generation of SDXL as soon as possible<br/><br/>
229
+ ''')
230
+ btn.click(fn=process, inputs=[selected_template_images,costum_image,prompt,nagetive_prompt,prompt_selected,retouching
231
+ ,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set], outputs=out)
232
+
233
+ demo.launch()