Spaces:

multimodalart
/

dreambooth-training

Running

multimodalart HF staff commited on Nov 29, 2022

Commit

a47c17f

•

1 Parent(s): 74e4319

Add v2-768

Files changed (2) hide show

app.py CHANGED Viewed

@@ -58,8 +58,8 @@ def swap_base_model(selected_model):
     global model_to_load
     if(selected_model == "v1-5"):
         model_to_load = model_v1
-    #elif(selected_model == "v2-768"):
-    #    model_to_load = model_v2
     else:
         model_to_load = model_v2_512
@@ -171,8 +171,7 @@ def train(*inputs):
             Training_Steps=1400
     stptxt = int((Training_Steps*Train_text_encoder_for)/100)
-    #gradient_checkpointing = False if which_model == "v1-5" else True
-    gradient_checkpointing=False
     resolution = 512 if which_model != "v2-768" else 768
     cache_latents = True if which_model != "v1-5" else False
     if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
@@ -445,7 +444,7 @@ with gr.Blocks(css=css) as demo:
     with gr.Row() as what_are_you_training:
         type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
-        base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512"], value="v1-5", interactive=True)
     #Very hacky approach to emulate dynamically created Gradio components
     with gr.Row() as upload_your_concept:

     global model_to_load
     if(selected_model == "v1-5"):
         model_to_load = model_v1
+    elif(selected_model == "v2-768"):
+        model_to_load = model_v2
     else:
         model_to_load = model_v2_512
             Training_Steps=1400
     stptxt = int((Training_Steps*Train_text_encoder_for)/100)
+    gradient_checkpointing = False if which_model == "v1-5" else True
     resolution = 512 if which_model != "v2-768" else 768
     cache_latents = True if which_model != "v1-5" else False
     if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
     with gr.Row() as what_are_you_training:
         type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
+        base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512", "v2-768"], value="v1-5", interactive=True)
     #Very hacky approach to emulate dynamically created Gradio components
     with gr.Row() as upload_your_concept:

train_dreambooth.py CHANGED Viewed

@@ -710,10 +710,10 @@ def run_training(args_imported):
                 # Convert images to latent space
                 with torch.no_grad():
                     if args.cache_latents:
-                        latents = batch[0][0]
                     else:
-                        latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample()
-                    latents = latents * 0.18215
                 # Sample noise that we'll add to the latents
                 noise = torch.randn_like(latents)

                 # Convert images to latent space
                 with torch.no_grad():
                     if args.cache_latents:
+                        latents_dist = batch[0][0]
                     else:
+                        latents_dist = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist
+                    latents = latents_dist.sample() * 0.18215
                 # Sample noise that we'll add to the latents
                 noise = torch.randn_like(latents)