Spaces:

sayakpaul
/

convert-kerascv-sd-diffusers

Build error

App Files Files Community

sayakpaul HF staff commited on Jan 31, 2023

Commit

214abe6

1 Parent(s): 3bd4a93

fix: PT text encoder init.

Browse files

Files changed (1) hide show

convert.py +12 -10

convert.py CHANGED Viewed

@@ -15,7 +15,7 @@ IMG_HEIGHT = IMG_WIDTH = 512
 MAX_SEQ_LENGTH = 77
-def initialize_pt_models():
     """Initializes the separate models of Stable Diffusion from diffusers and downloads
     their pre-trained weights."""
     pt_text_encoder = CLIPTextModel.from_pretrained(
@@ -32,6 +32,16 @@ def initialize_pt_models():
         PRETRAINED_CKPT, subfolder="safety_checker", revision=NON_EMA_REVISION
     )
     return pt_text_encoder, pt_tokenizer, pt_vae, pt_unet, pt_safety_checker
@@ -93,7 +103,7 @@ def run_conversion(
         pt_vae,
         pt_unet,
         pt_safety_checker,
-    ) = initialize_pt_models()
     tf_text_encoder, tf_unet, tf_tokenizer = initialize_tf_models(
         text_encoder_weights, unet_weights, placeholder_token
     )
@@ -103,14 +113,6 @@ def run_conversion(
         print("Initializing a new text encoder with the placeholder token...")
         tf_text_encoder = create_new_text_encoder(tf_text_encoder, tf_tokenizer)
-        print("Adding the placeholder token to PT CLIPTokenizer...")
-        num_added_tokens = pt_tokenizer.add_tokens(placeholder_token)
-        if num_added_tokens == 0:
-            raise ValueError(
-                f"The tokenizer already contains the token {placeholder_token}. Please pass a different"
-                " `placeholder_token` that is not already in the tokenizer."
-            )
     if text_encoder_weights is not None:
         print("Loading fine-tuned text encoder weights.")
         text_encoder_weights_path = tf.keras.utils.get_file(origin=text_encoder_weights)

 MAX_SEQ_LENGTH = 77
+def initialize_pt_models(placeholder_token: str):
     """Initializes the separate models of Stable Diffusion from diffusers and downloads
     their pre-trained weights."""
     pt_text_encoder = CLIPTextModel.from_pretrained(
         PRETRAINED_CKPT, subfolder="safety_checker", revision=NON_EMA_REVISION
     )
+    if placeholder_token is not None:
+        num_added_tokens = pt_tokenizer.add_tokens(placeholder_token)
+        if num_added_tokens == 0:
+            raise ValueError(
+                f"The tokenizer already contains the token {placeholder_token}. Please pass a different"
+                " `placeholder_token` that is not already in the tokenizer."
+            )
+        # Resize the token embeddings as we are adding new special tokens to the tokenizer
+        pt_text_encoder.resize_token_embeddings(len(pt_tokenizer))
     return pt_text_encoder, pt_tokenizer, pt_vae, pt_unet, pt_safety_checker
         pt_vae,
         pt_unet,
         pt_safety_checker,
+    ) = initialize_pt_models(populate_text_encoder)
     tf_text_encoder, tf_unet, tf_tokenizer = initialize_tf_models(
         text_encoder_weights, unet_weights, placeholder_token
     )
         print("Initializing a new text encoder with the placeholder token...")
         tf_text_encoder = create_new_text_encoder(tf_text_encoder, tf_tokenizer)
     if text_encoder_weights is not None:
         print("Loading fine-tuned text encoder weights.")
         text_encoder_weights_path = tf.keras.utils.get_file(origin=text_encoder_weights)