JackAILab commited on
Commit
4979db1
·
verified ·
1 Parent(s): 0d19657

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -75
app.py CHANGED
@@ -19,7 +19,7 @@ from models.BiSeNet.model import BiSeNet
19
  # zero = torch.Tensor([0]).cuda()
20
  # print(zero.device) # <-- 'cpu' 🤔
21
  # device = zero.device # "cuda"
22
- device = "cuda"
23
 
24
  # Gets the absolute path of the current script
25
  script_directory = os.path.dirname(os.path.realpath(__file__))
@@ -34,14 +34,14 @@ pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
34
  torch_dtype=torch.float16,
35
  safety_checker=None, # use_safetensors=True,
36
  # variant="fp16"
37
- ).to(device)
38
 
39
  ### Load other pretrained models
40
  ## BiSenet
41
  bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
42
  bise_net = BiSeNet(n_classes = 19)
43
  bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
44
- bise_net.cuda()
45
 
46
  import sys
47
  sys.path.append("./models/LLaVA")
@@ -77,85 +77,102 @@ pipe.FacialEncoder.to(device)
77
 
78
 
79
 
80
- @spaces.GPU
81
  def process(selected_template_images,costum_image,prompt
82
  ,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- if model_selected_tab==0:
85
- select_images = load_image(Image.open(selected_template_images))
86
- else:
87
- select_images = load_image(Image.fromarray(costum_image))
88
-
89
- if prompt_selected_tab==0:
90
- prompt = prompt_selected
91
- negative_prompt = ""
92
- need_safetycheck = False
93
- else:
94
- need_safetycheck = True
95
-
96
- # hyper-parameter
97
- num_steps = 50
98
- seed_set = torch.randint(0, 1000, (1,)).item()
99
- # merge_steps = 30
100
 
101
- @torch.inference_mode()
102
- def Enhance_prompt(prompt,select_images):
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
105
- # args = type('Args', (), {
106
- # "model_path": llva_model_path,
107
- # "model_base": None,
108
- # "model_name": get_model_name_from_path(llva_model_path),
109
- # "query": llva_prompt,
110
- # "conv_mode": None,
111
- # "image_file": select_images,
112
- # "sep": ",",
113
- # "temperature": 0,
114
- # "top_p": None,
115
- # "num_beams": 1,
116
- # "max_new_tokens": 512
117
- # })()
118
- # Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
119
 
120
- return Enhanced_prompt
121
-
122
- if prompt == "":
123
- prompt = "A man, in a forest"
124
- prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
125
- prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
126
- else:
127
- # prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
128
- print(prompt)
129
- pass
130
-
131
- if negative_prompt == "":
132
- negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
133
-
134
- #Extend Prompt
135
- prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
136
-
137
- negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
138
- negative_prompt = negative_prompt + negtive_prompt_group
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # seed = torch.randint(0, 1000, (1,)).item()
141
- generator = torch.Generator(device=device).manual_seed(seed_set)
142
-
143
- images = pipe(
144
- prompt=prompt,
145
- width=width,
146
- height=height,
147
- input_id_images=select_images,
148
- negative_prompt=negative_prompt,
149
- num_images_per_prompt=1,
150
- num_inference_steps=num_steps,
151
- start_merge_step=merge_steps,
152
- generator=generator,
153
- retouching=retouching,
154
- need_safetycheck=need_safetycheck,
155
- ).images[0]
156
-
157
- current_date = datetime.today()
158
- return np.array(images)
159
 
160
  # Gets the templates
161
  script_directory = os.path.dirname(os.path.realpath(__file__))
 
19
  # zero = torch.Tensor([0]).cuda()
20
  # print(zero.device) # <-- 'cpu' 🤔
21
  # device = zero.device # "cuda"
22
+ device = "cpu"
23
 
24
  # Gets the absolute path of the current script
25
  script_directory = os.path.dirname(os.path.realpath(__file__))
 
34
  torch_dtype=torch.float16,
35
  safety_checker=None, # use_safetensors=True,
36
  # variant="fp16"
37
+ )
38
 
39
  ### Load other pretrained models
40
  ## BiSenet
41
  bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
42
  bise_net = BiSeNet(n_classes = 19)
43
  bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
44
+ # bise_net.cuda()
45
 
46
  import sys
47
  sys.path.append("./models/LLaVA")
 
77
 
78
 
79
 
80
+ @spaces.GPU(duration=120)
81
  def process(selected_template_images,costum_image,prompt
82
  ,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
83
+
84
+ inference_device = "cuda"
85
+
86
+ pipe.to(inference_device)
87
+ pipe.image_encoder.to(inference_device)
88
+ pipe.image_proj_model.to(inference_device)
89
+ pipe.FacialEncoder.to(inference_device)
90
+ pipe.bise_net.to(inference_device)
91
+
92
+ try:
93
+ if model_selected_tab==0:
94
+ select_images = load_image(Image.open(selected_template_images))
95
+ else:
96
+ select_images = load_image(Image.fromarray(costum_image))
97
 
98
+ if prompt_selected_tab==0:
99
+ prompt = prompt_selected
100
+ negative_prompt = ""
101
+ need_safetycheck = False
102
+ else:
103
+ need_safetycheck = True
104
+
105
+ # hyper-parameter
106
+ num_steps = 50
107
+ seed_set = torch.randint(0, 1000, (1,)).item()
108
+ # merge_steps = 30
109
+
110
+ @torch.inference_mode()
111
+ def Enhance_prompt(prompt,select_images):
 
 
112
 
113
+ llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
114
+ # args = type('Args', (), {
115
+ # "model_path": llva_model_path,
116
+ # "model_base": None,
117
+ # "model_name": get_model_name_from_path(llva_model_path),
118
+ # "query": llva_prompt,
119
+ # "conv_mode": None,
120
+ # "image_file": select_images,
121
+ # "sep": ",",
122
+ # "temperature": 0,
123
+ # "top_p": None,
124
+ # "num_beams": 1,
125
+ # "max_new_tokens": 512
126
+ # })()
127
+ # Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
128
 
129
+ return Enhanced_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ if prompt == "":
132
+ prompt = "A man, in a forest"
133
+ prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
134
+ prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
135
+ else:
136
+ # prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
137
+ print(prompt)
138
+ pass
139
+
140
+ if negative_prompt == "":
141
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
142
+
143
+ #Extend Prompt
144
+ prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
145
+
146
+ negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
147
+ negative_prompt = negative_prompt + negtive_prompt_group
148
+
149
+ # seed = torch.randint(0, 1000, (1,)).item()
150
+ generator = torch.Generator(device=device).manual_seed(seed_set)
151
+
152
+ images = pipe(
153
+ prompt=prompt,
154
+ width=width,
155
+ height=height,
156
+ input_id_images=select_images,
157
+ negative_prompt=negative_prompt,
158
+ num_images_per_prompt=1,
159
+ num_inference_steps=num_steps,
160
+ start_merge_step=merge_steps,
161
+ generator=generator,
162
+ retouching=retouching,
163
+ need_safetycheck=need_safetycheck,
164
+ ).images[0]
165
 
166
+ current_date = datetime.today()
167
+ return np.array(images)
168
+ finally:
169
+ # Release the GPU after the reasoning is completed
170
+ pipe.to("cpu")
171
+ pipe.image_encoder.to("cpu")
172
+ pipe.image_proj_model.to("cpu")
173
+ pipe.FacialEncoder.to("cpu")
174
+ pipe.bise_net.to("cpu")
175
+ torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
176
 
177
  # Gets the templates
178
  script_directory = os.path.dirname(os.path.realpath(__file__))