adaface-neurips commited on
Commit
b0b5a77
1 Parent(s): 0b38fab

add link to adaface, various improvements

Browse files
Files changed (4) hide show
  1. adaface/adaface_wrapper.py +15 -7
  2. adaface/util.py +7 -6
  3. app.py +14 -5
  4. infer.py +2 -1
adaface/adaface_wrapper.py CHANGED
@@ -12,6 +12,8 @@ from insightface.app import FaceAnalysis
12
  from adaface.arc2face_models import CLIPTextModelWrapper
13
  from adaface.util import get_arc2face_id_prompt_embs
14
  import re, os
 
 
15
 
16
  class AdaFaceWrapper(nn.Module):
17
  def __init__(self, pipeline_name, base_model_path, adaface_ckpt_path, device,
@@ -216,7 +218,7 @@ class AdaFaceWrapper(nn.Module):
216
  # NOTE: Since return_core_id_embs is True, id_prompt_emb is only the 16 core ID embeddings.
217
  # arc2face prompt template: "photo of a id person"
218
  # ID embeddings start from "id person ...". So there are 3 template tokens before the 16 ID embeddings.
219
- faceid_embeds, id_prompt_emb \
220
  = get_arc2face_id_prompt_embs(self.face_app, self.pipeline.tokenizer, self.arc2face_text_encoder,
221
  extract_faceid_embeds=not gen_rand_face,
222
  pre_face_embs=pre_face_embs,
@@ -235,6 +237,9 @@ class AdaFaceWrapper(nn.Module):
235
  gen_neg_prompt=False,
236
  verbose=True)
237
 
 
 
 
238
  # adaface_subj_embs: [1, 1, 16, 768].
239
  # adaface_prompt_embs: [1, 77, 768] (not used).
240
  adaface_subj_embs, adaface_prompt_embs = \
@@ -248,7 +253,7 @@ class AdaFaceWrapper(nn.Module):
248
  self.update_text_encoder_subj_embs(adaface_subj_embs)
249
  return adaface_subj_embs
250
 
251
- def encode_prompt(self, prompt, device="cuda", verbose=False):
252
  prompt = self.update_prompt(prompt)
253
  if verbose:
254
  print(f"Prompt: {prompt}")
@@ -259,14 +264,16 @@ class AdaFaceWrapper(nn.Module):
259
  # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
260
  prompt_embeds_, negative_prompt_embeds_ = \
261
  self.pipeline.encode_prompt(prompt, device=device, num_images_per_prompt=1,
262
- do_classifier_free_guidance=True, negative_prompt=self.negative_prompt)
263
  return prompt_embeds_, negative_prompt_embeds_
264
 
265
  # ref_img_strength is used only in the img2img pipeline.
266
- def forward(self, noise, prompt, guidance_scale=4.0, out_image_count=4, ref_img_strength=0.8, verbose=False):
 
 
 
267
  # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
268
- prompt_embeds_, negative_prompt_embeds_ = self.encode_prompt(prompt, device=self.device, verbose=verbose)
269
-
270
  # Repeat the prompt embeddings for all images in the batch.
271
  prompt_embeds_ = prompt_embeds_.repeat(out_image_count, 1, 1)
272
  negative_prompt_embeds_ = negative_prompt_embeds_.repeat(out_image_count, 1, 1)
@@ -280,7 +287,8 @@ class AdaFaceWrapper(nn.Module):
280
  num_inference_steps=self.num_inference_steps,
281
  guidance_scale=guidance_scale,
282
  num_images_per_prompt=1,
283
- strength=ref_img_strength).images
 
284
  # images: [BS, 3, 512, 512]
285
  return images
286
 
 
12
  from adaface.arc2face_models import CLIPTextModelWrapper
13
  from adaface.util import get_arc2face_id_prompt_embs
14
  import re, os
15
+ import sys
16
+ sys.modules['ldm'] = sys.modules['adaface']
17
 
18
  class AdaFaceWrapper(nn.Module):
19
  def __init__(self, pipeline_name, base_model_path, adaface_ckpt_path, device,
 
218
  # NOTE: Since return_core_id_embs is True, id_prompt_emb is only the 16 core ID embeddings.
219
  # arc2face prompt template: "photo of a id person"
220
  # ID embeddings start from "id person ...". So there are 3 template tokens before the 16 ID embeddings.
221
+ face_image_count, faceid_embeds, id_prompt_emb \
222
  = get_arc2face_id_prompt_embs(self.face_app, self.pipeline.tokenizer, self.arc2face_text_encoder,
223
  extract_faceid_embeds=not gen_rand_face,
224
  pre_face_embs=pre_face_embs,
 
237
  gen_neg_prompt=False,
238
  verbose=True)
239
 
240
+ if face_image_count == 0:
241
+ return None
242
+
243
  # adaface_subj_embs: [1, 1, 16, 768].
244
  # adaface_prompt_embs: [1, 77, 768] (not used).
245
  adaface_subj_embs, adaface_prompt_embs = \
 
253
  self.update_text_encoder_subj_embs(adaface_subj_embs)
254
  return adaface_subj_embs
255
 
256
+ def encode_prompt(self, prompt, negative_prompt, device="cuda", verbose=False):
257
  prompt = self.update_prompt(prompt)
258
  if verbose:
259
  print(f"Prompt: {prompt}")
 
264
  # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
265
  prompt_embeds_, negative_prompt_embeds_ = \
266
  self.pipeline.encode_prompt(prompt, device=device, num_images_per_prompt=1,
267
+ do_classifier_free_guidance=True, negative_prompt=negative_prompt)
268
  return prompt_embeds_, negative_prompt_embeds_
269
 
270
  # ref_img_strength is used only in the img2img pipeline.
271
+ def forward(self, noise, prompt, negative_prompt=None, guidance_scale=4.0,
272
+ out_image_count=4, ref_img_strength=0.8, generator=None, verbose=False):
273
+ if negative_prompt is None:
274
+ negative_prompt = self.negative_prompt
275
  # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
276
+ prompt_embeds_, negative_prompt_embeds_ = self.encode_prompt(prompt, negative_prompt, device=self.device, verbose=verbose)
 
277
  # Repeat the prompt embeddings for all images in the batch.
278
  prompt_embeds_ = prompt_embeds_.repeat(out_image_count, 1, 1)
279
  negative_prompt_embeds_ = negative_prompt_embeds_.repeat(out_image_count, 1, 1)
 
287
  num_inference_steps=self.num_inference_steps,
288
  guidance_scale=guidance_scale,
289
  num_images_per_prompt=1,
290
+ strength=ref_img_strength,
291
+ generator=generator).images
292
  # images: [BS, 3, 512, 512]
293
  return images
294
 
adaface/util.py CHANGED
@@ -246,8 +246,9 @@ def get_arc2face_id_prompt_embs(face_app, clip_tokenizer, arc2face_text_encoder,
246
  input_max_length=77, noise_level=0.0,
247
  return_core_id_embs=False,
248
  gen_neg_prompt=False, verbose=False):
 
 
249
  if extract_faceid_embeds:
250
- image_count = 0
251
  faceid_embeds = []
252
  if image_paths is not None:
253
  images_np = []
@@ -275,13 +276,13 @@ def get_arc2face_id_prompt_embs(face_app, clip_tokenizer, arc2face_text_encoder,
275
  face_info = sorted(face_infos, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1]
276
  # Each faceid_embed: [1, 512]
277
  faceid_embeds.append(torch.from_numpy(face_info.normed_embedding).unsqueeze(0))
278
- image_count += 1
279
 
280
  if verbose:
281
  if image_folder is not None:
282
- print(f"Extracted ID embeddings from {image_count} images in {image_folder}")
283
  else:
284
- print(f"Extracted ID embeddings from {image_count} images")
285
 
286
  if len(faceid_embeds) == 0:
287
  print("No face detected. Use a random face instead.")
@@ -335,7 +336,7 @@ def get_arc2face_id_prompt_embs(face_app, clip_tokenizer, arc2face_text_encoder,
335
 
336
  #if extract_faceid_embeds:
337
  # arc2face_neg_prompt_emb = arc2face_neg_prompt_emb.repeat(id_batch_size, 1, 1)
338
- return faceid_embeds, arc2face_pos_prompt_emb, arc2face_neg_prompt_emb
339
  else:
340
- return faceid_embeds, arc2face_pos_prompt_emb
341
 
 
246
  input_max_length=77, noise_level=0.0,
247
  return_core_id_embs=False,
248
  gen_neg_prompt=False, verbose=False):
249
+ face_image_count = 0
250
+
251
  if extract_faceid_embeds:
 
252
  faceid_embeds = []
253
  if image_paths is not None:
254
  images_np = []
 
276
  face_info = sorted(face_infos, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1]
277
  # Each faceid_embed: [1, 512]
278
  faceid_embeds.append(torch.from_numpy(face_info.normed_embedding).unsqueeze(0))
279
+ face_image_count += 1
280
 
281
  if verbose:
282
  if image_folder is not None:
283
+ print(f"Extracted ID embeddings from {face_image_count} images in {image_folder}")
284
  else:
285
+ print(f"Extracted ID embeddings from {face_image_count} images")
286
 
287
  if len(faceid_embeds) == 0:
288
  print("No face detected. Use a random face instead.")
 
336
 
337
  #if extract_faceid_embeds:
338
  # arc2face_neg_prompt_emb = arc2face_neg_prompt_emb.repeat(id_batch_size, 1, 1)
339
+ return face_image_count, faceid_embeds, arc2face_pos_prompt_emb, arc2face_neg_prompt_emb
340
  else:
341
+ return face_image_count, faceid_embeds, arc2face_pos_prompt_emb
342
 
app.py CHANGED
@@ -76,8 +76,13 @@ def gen_init_images(uploaded_image_paths, prompt, adaface_id_cfg_scale, out_imag
76
  # [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
77
  # Extract the file paths.
78
  uploaded_image_paths = [path[0] for path in uploaded_image_paths]
79
- adaface.generate_adaface_embeddings(image_folder=None, image_paths=uploaded_image_paths,
80
- out_id_embs_scale=adaface_id_cfg_scale, update_text_encoder=True)
 
 
 
 
 
81
  # Generate two images each time for the user to select from.
82
  noise = torch.randn(out_image_count, 3, 512, 512)
83
  # samples: A list of PIL Image instances.
@@ -163,7 +168,7 @@ def generate_image(image_container, uploaded_image_paths, init_img_file_paths, i
163
  save_videos_grid(sample, save_sample_path)
164
  return save_sample_path
165
 
166
- def validate(prompt):
167
  if not prompt:
168
  raise gr.Error("Prompt cannot be blank")
169
 
@@ -229,7 +234,11 @@ with gr.Blocks(css=css) as demo:
229
  ❗️❗️❗️**Tips:**
230
  - You can upload one or more subject images for generating ID-specific video.
231
  - Try different parameter combinations for the best generation quality.
232
- - Technical explanations and demo videos: [Readme](https://huggingface.co/spaces/adaface-neurips/adaface-animate/blob/main/README2.md).
 
 
 
 
233
  """
234
  )
235
 
@@ -401,7 +410,7 @@ with gr.Blocks(css=css) as demo:
401
  outputs=[uploaded_init_img_gallery, init_img_files, init_clear_button_column])
402
  uploaded_init_img_gallery.select(fn=get_clicked_image, inputs=None, outputs=init_img_selected_idx)
403
 
404
- submit.click(fn=validate,
405
  inputs=[prompt],outputs=None).success(
406
  fn=randomize_seed_fn,
407
  inputs=[seed, randomize_seed],
 
76
  # [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
77
  # Extract the file paths.
78
  uploaded_image_paths = [path[0] for path in uploaded_image_paths]
79
+ adaface_subj_embs = \
80
+ adaface.generate_adaface_embeddings(image_folder=None, image_paths=uploaded_image_paths,
81
+ out_id_embs_scale=adaface_id_cfg_scale, update_text_encoder=True)
82
+
83
+ if adaface_subj_embs is None:
84
+ raise gr.Error(f"Failed to detect any faces! Please try with other images")
85
+
86
  # Generate two images each time for the user to select from.
87
  noise = torch.randn(out_image_count, 3, 512, 512)
88
  # samples: A list of PIL Image instances.
 
168
  save_videos_grid(sample, save_sample_path)
169
  return save_sample_path
170
 
171
+ def validate_prompt(prompt):
172
  if not prompt:
173
  raise gr.Error("Prompt cannot be blank")
174
 
 
234
  ❗️❗️❗️**Tips:**
235
  - You can upload one or more subject images for generating ID-specific video.
236
  - Try different parameter combinations for the best generation quality.
237
+ - Usage explanations and demos: [Readme](https://huggingface.co/spaces/adaface-neurips/adaface-animate/blob/main/README2.md).
238
+ - AdaFace Text-to-Image: <a href="https://huggingface.co/spaces/adaface-neurips/adaface" style="display: inline-flex; align-items: center;">
239
+ AdaFace
240
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-yellow" alt="Hugging Face Spaces" style="margin-left: 5px;">
241
+ </a>
242
  """
243
  )
244
 
 
410
  outputs=[uploaded_init_img_gallery, init_img_files, init_clear_button_column])
411
  uploaded_init_img_gallery.select(fn=get_clicked_image, inputs=None, outputs=init_img_selected_idx)
412
 
413
+ submit.click(fn=validate_prompt,
414
  inputs=[prompt],outputs=None).success(
415
  fn=randomize_seed_fn,
416
  inputs=[seed, randomize_seed],
infer.py CHANGED
@@ -64,7 +64,8 @@ def load_model(base_model_type="sar", adaface_base_model_type="sar",
64
  # scheduler=DPMSolverMultistepScheduler(**OmegaConf.to_container(inference_config.DPMSolver_scheduler_kwargs)
65
  # scheduler=EulerAncestralDiscreteScheduler(**OmegaConf.to_container(inference_config.noise_scheduler_kwargs)
66
  # scheduler=EulerAncestralDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="linear",steps_offset=1
67
- ),torch_dtype=torch.float16,
 
68
  ).to(device=device)
69
 
70
  pipeline = load_weights(
 
64
  # scheduler=DPMSolverMultistepScheduler(**OmegaConf.to_container(inference_config.DPMSolver_scheduler_kwargs)
65
  # scheduler=EulerAncestralDiscreteScheduler(**OmegaConf.to_container(inference_config.noise_scheduler_kwargs)
66
  # scheduler=EulerAncestralDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="linear",steps_offset=1
67
+ ),
68
+ torch_dtype=torch.float16,
69
  ).to(device=device)
70
 
71
  pipeline = load_weights(