Spaces:

adaface-neurips
/

adaface-animate

Running on Zero

App Files Files Community

adaface-neurips commited on Jul 3, 2024

Commit

13d8b07

1 Parent(s): 81f8834

minor changes

Browse files

Files changed (2) hide show

adaface/adaface_wrapper.py +22 -7
app.py +7 -5

adaface/adaface_wrapper.py CHANGED Viewed

@@ -84,7 +84,7 @@ class AdaFaceWrapper(nn.Module):
         if self.use_ds_text_encoder:
             # The dreamshaper v7 finetuned text encoder follows the prompt slightly better than the original text encoder.
             # https://huggingface.co/Lykon/DreamShaper/tree/main/text_encoder
-            text_encoder = CLIPTextModel.from_pretrained("models/ds_text_encoder", torch_dtype=torch.float16)
         else:
             text_encoder = None
@@ -253,10 +253,13 @@ class AdaFaceWrapper(nn.Module):
             self.update_text_encoder_subj_embs(adaface_subj_embs)
         return adaface_subj_embs
-    def encode_prompt(self, prompt, negative_prompt=None, device="cuda", verbose=False):
         if negative_prompt is None:
             negative_prompt = self.negative_prompt
         prompt = self.update_prompt(prompt)
         if verbose:
             print(f"Prompt: {prompt}")
@@ -264,10 +267,22 @@ class AdaFaceWrapper(nn.Module):
         # For some unknown reason, the text_encoder is still on CPU after self.pipeline.to(self.device).
         # So we manually move it to GPU here.
         self.pipeline.text_encoder.to(device)
-        # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
-        prompt_embeds_, negative_prompt_embeds_ = \
-            self.pipeline.encode_prompt(prompt, device=device, num_images_per_prompt=1,
-                                        do_classifier_free_guidance=True, negative_prompt=negative_prompt)
         return prompt_embeds_, negative_prompt_embeds_
     # ref_img_strength is used only in the img2img pipeline.

         if self.use_ds_text_encoder:
             # The dreamshaper v7 finetuned text encoder follows the prompt slightly better than the original text encoder.
             # https://huggingface.co/Lykon/DreamShaper/tree/main/text_encoder
+            text_encoder = CLIPTextModel.from_pretrained("models/diffusers/ds_text_encoder", torch_dtype=torch.float16)
         else:
             text_encoder = None
             self.update_text_encoder_subj_embs(adaface_subj_embs)
         return adaface_subj_embs
+    def encode_prompt(self, prompt, negative_prompt=None, device=None, verbose=False):
         if negative_prompt is None:
             negative_prompt = self.negative_prompt
+        if device is None:
+            device = self.device
         prompt = self.update_prompt(prompt)
         if verbose:
             print(f"Prompt: {prompt}")
         # For some unknown reason, the text_encoder is still on CPU after self.pipeline.to(self.device).
         # So we manually move it to GPU here.
         self.pipeline.text_encoder.to(device)
+        # Compatible with older versions of diffusers.
+        if not hasattr(self.pipeline, "encode_prompt"):
+            # prompt_embeds_, negative_prompt_embeds_: [77, 768] -> [1, 77, 768].
+            prompt_embeds_, negative_prompt_embeds_ = \
+                self.pipeline._encode_prompt(prompt, device=device, num_images_per_prompt=1,
+                                             do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+            prompt_embeds_ = prompt_embeds_.unsqueeze(0)
+            negative_prompt_embeds_ = negative_prompt_embeds_.unsqueeze(0)
+        else:
+            # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
+            prompt_embeds_, negative_prompt_embeds_ = \
+                self.pipeline.encode_prompt(prompt, device=device,
+                                            num_images_per_prompt=1,
+                                            do_classifier_free_guidance=True,
+                                            negative_prompt=negative_prompt)
         return prompt_embeds_, negative_prompt_embeds_
     # ref_img_strength is used only in the img2img pipeline.

app.py CHANGED Viewed

@@ -88,7 +88,8 @@ def gen_init_images(uploaded_image_paths, prompt, adaface_id_cfg_scale, out_imag
     # Generate two images each time for the user to select from.
     noise = torch.randn(out_image_count, 3, 512, 512)
     # samples: A list of PIL Image instances.
-    samples = adaface(noise, prompt, out_image_count=out_image_count, verbose=True)
     face_paths = []
     for sample in samples:
@@ -130,10 +131,11 @@ def generate_image(image_container, uploaded_image_paths, init_img_file_paths, i
             # Reload the embedding manager
             adaface.load_subj_basis_generator(adaface_ckpt_path)
-        adaface.generate_adaface_embeddings(image_folder=None, image_paths=uploaded_image_paths,
-                                            out_id_embs_scale=adaface_id_cfg_scale, update_text_encoder=True)
-        # adaface_prompt_embeds: [1, 77, 768].
-        adaface_prompt_embeds, _ = adaface.encode_prompt(prompt)
     # init_img_file_paths is a list of image paths. If not chose, init_img_file_paths is None.
     if init_img_file_paths is not None:

     # Generate two images each time for the user to select from.
     noise = torch.randn(out_image_count, 3, 512, 512)
     # samples: A list of PIL Image instances.
+    with torch.no_grad():
+        samples = adaface(noise, prompt, out_image_count=out_image_count, verbose=True)
     face_paths = []
     for sample in samples:
             # Reload the embedding manager
             adaface.load_subj_basis_generator(adaface_ckpt_path)
+        with torch.no_grad():
+            adaface.generate_adaface_embeddings(image_folder=None, image_paths=uploaded_image_paths,
+                                                out_id_embs_scale=adaface_id_cfg_scale, update_text_encoder=True)
+            # adaface_prompt_embeds: [1, 77, 768].
+            adaface_prompt_embeds, _ = adaface.encode_prompt(prompt)
     # init_img_file_paths is a list of image paths. If not chose, init_img_file_paths is None.
     if init_img_file_paths is not None: