Sreerama commited on
Commit
1a8fb5a
1 Parent(s): 00bee52

update with mirage branding

Browse files
Files changed (5) hide show
  1. app.py +111 -146
  2. cat-toy-deprec.png +0 -0
  3. cat-toy.png +0 -0
  4. cattoy.png +0 -0
  5. mirage.png +0 -0
app.py CHANGED
@@ -54,7 +54,7 @@ def swap_text(option, base):
54
  show_prior_preservation=False
55
  if(show_prior_preservation):
56
  prior_preservation_box_update = gr.update(visible=show_prior_preservation)
57
- else:
58
  prior_preservation_box_update = gr.update(visible=show_prior_preservation, value=False)
59
  return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. You can use services like <a style='text-decoration: underline' target='_blank' href='https://www.birme.net/?target_width={resize_width}&target_height={resize_width}'>birme</a> for smart cropping. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to {resize_width}x{resize_width}.", freeze_for, prior_preservation_box_update]
60
  elif(option == "style"):
@@ -81,16 +81,13 @@ def count_files(*inputs):
81
  if(files):
82
  concept_counter+=1
83
  file_counter+=len(files)
84
- uses_custom = inputs[-1]
85
- type_of_thing = inputs[-4]
86
- selected_model = inputs[-5]
87
- experimental_faces = inputs[-6]
88
  if(uses_custom):
89
  Training_Steps = int(inputs[-3])
90
  else:
91
  Training_Steps = file_counter*150
92
- if(type_of_thing == "person" and Training_Steps > 2400):
93
- Training_Steps = 2400 #Avoid overfitting on person faces
94
  if(is_spaces):
95
  if(selected_model == "v1-5"):
96
  its = 1.1
@@ -102,12 +99,12 @@ def count_files(*inputs):
102
  its = 0.7
103
  elif(selected_model == "v2-768"):
104
  its = 0.5
105
- summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/its, 2)} seconds, or {round((Training_Steps/its)/60, 2)} minutes.
106
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is below US${round((((Training_Steps/its)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
107
  If you check the box below the GPU attribution will automatically removed after training is done and the model is uploaded. If not, don't forget to come back here and swap the hardware back to CPU.<br><br>'''
108
  else:
109
- summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps.<br><br>'''
110
-
111
  return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
112
 
113
  def update_steps(*files_list):
@@ -141,7 +138,7 @@ def train(*inputs):
141
  del pipe
142
  pipe_is_set = False
143
  gc.collect()
144
-
145
  if os.path.exists("output_model"): shutil.rmtree('output_model')
146
  if os.path.exists("instance_images"): shutil.rmtree('instance_images')
147
  if os.path.exists("diffusers_model.tar"): os.remove("diffusers_model.tar")
@@ -166,91 +163,51 @@ def train(*inputs):
166
  image = image.convert('RGB')
167
  image.save(f'instance_images/{prompt}_({j+1}).jpg', format="JPEG", quality = 100)
168
  file_counter += 1
169
-
170
  os.makedirs('output_model',exist_ok=True)
171
- uses_custom = inputs[-1]
172
- type_of_thing = inputs[-4]
173
- remove_attribution_after = inputs[-6]
174
- experimental_face_improvement = inputs[-9]
175
-
176
  if(uses_custom):
177
  Training_Steps = int(inputs[-3])
178
  Train_text_encoder_for = int(inputs[-2])
179
  else:
180
- if(type_of_thing == "object"):
181
- Train_text_encoder_for=30
182
-
183
- elif(type_of_thing == "style"):
184
- Train_text_encoder_for=15
185
-
186
- elif(type_of_thing == "person"):
187
- Train_text_encoder_for=70
188
-
189
  Training_Steps = file_counter*150
190
- if(type_of_thing == "person" and Training_Steps > 2600):
191
- Training_Steps = 2600 #Avoid overfitting on people's faces
192
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
193
- gradient_checkpointing = True if (experimental_face_improvement or which_model != "v1-5") else False
194
  cache_latents = True if which_model != "v1-5" else False
195
- if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
196
- args_general = argparse.Namespace(
197
- image_captions_filename = True,
198
- train_text_encoder = True if stptxt > 0 else False,
199
- stop_text_encoder_training = stptxt,
200
- save_n_steps = 0,
201
- pretrained_model_name_or_path = model_to_load,
202
- instance_data_dir="instance_images",
203
- class_data_dir=None,
204
- output_dir="output_model",
205
- instance_prompt="",
206
- seed=42,
207
- resolution=resolution,
208
- mixed_precision="fp16",
209
- train_batch_size=1,
210
- gradient_accumulation_steps=1,
211
- use_8bit_adam=True,
212
- learning_rate=2e-6,
213
- lr_scheduler="polynomial",
214
- lr_warmup_steps = 0,
215
- max_train_steps=Training_Steps,
216
- gradient_checkpointing=gradient_checkpointing,
217
- cache_latents=cache_latents,
218
- )
219
- print("Starting single training...")
220
- lock_file = open("intraining.lock", "w")
221
- lock_file.close()
222
- run_training(args_general)
223
- else:
224
- args_general = argparse.Namespace(
225
- image_captions_filename = True,
226
- train_text_encoder = True if stptxt > 0 else False,
227
- stop_text_encoder_training = stptxt,
228
- save_n_steps = 0,
229
- pretrained_model_name_or_path = model_to_load,
230
- instance_data_dir="instance_images",
231
- class_data_dir="Mix",
232
- output_dir="output_model",
233
- with_prior_preservation=True,
234
- prior_loss_weight=1.0,
235
- instance_prompt="",
236
- seed=42,
237
- resolution=resolution,
238
- mixed_precision="fp16",
239
- train_batch_size=1,
240
- gradient_accumulation_steps=1,
241
- use_8bit_adam=True,
242
- learning_rate=2e-6,
243
- lr_scheduler="polynomial",
244
- lr_warmup_steps = 0,
245
- max_train_steps=Training_Steps,
246
- num_class_images=200,
247
- gradient_checkpointing=gradient_checkpointing,
248
- cache_latents=cache_latents,
249
- )
250
- print("Starting multi-training...")
251
- lock_file = open("intraining.lock", "w")
252
- lock_file.close()
253
- run_training(args_general)
254
  gc.collect()
255
  torch.cuda.empty_cache()
256
  if(which_model == "v1-5"):
@@ -258,7 +215,7 @@ def train(*inputs):
258
  shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
259
  shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
260
  shutil.copy(f"model_index.json", "output_model/model_index.json")
261
-
262
  if(not remove_attribution_after):
263
  print("Archiving model file...")
264
  with tarfile.open("diffusers_model.tar", "w") as tar:
@@ -295,10 +252,10 @@ def generate(prompt, steps):
295
  pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
296
  pipe = pipe.to("cuda")
297
  pipe_is_set = True
298
-
299
- image = pipe(prompt, num_inference_steps=steps).images[0]
300
  return(image)
301
-
302
  def push(model_name, where_to_upload, hf_token, which_model, comes_from_automated=False):
303
  if(not os.path.exists("model.ckpt")):
304
  convert("output_model", "model.ckpt")
@@ -307,13 +264,13 @@ def push(model_name, where_to_upload, hf_token, which_model, comes_from_automate
307
  model_name_slug = slugify(model_name)
308
  api = HfApi()
309
  your_username = api.whoami(token=hf_token)["name"]
310
- if(where_to_upload == "My personal profile"):
311
  model_id = f"{your_username}/{model_name_slug}"
312
  else:
313
  model_id = f"sd-dreambooth-library/{model_name_slug}"
314
  headers = {"Authorization" : f"Bearer: {hf_token}", "Content-Type": "application/json"}
315
  response = requests.post("https://huggingface.co/organizations/sd-dreambooth-library/share/SSeOwppVCscfTEzFGQaqpfcjukVeNrKNHX", headers=headers)
316
-
317
  images_upload = os.listdir("instance_images")
318
  image_string = ""
319
  instance_prompt_list = []
@@ -326,7 +283,7 @@ def push(model_name, where_to_upload, hf_token, which_model, comes_from_automate
326
  else:
327
  title_instance_prompt_string = ''
328
  previous_instance_prompt = instance_prompt
329
- image_string = f'''{title_instance_prompt_string} {"(use that on your prompt)" if title_instance_prompt_string != "" else ""}
330
  {image_string}![{instance_prompt} {i}](https://huggingface.co/{model_id}/resolve/main/concept_images/{urllib.parse.quote(image)})'''
331
  readme_text = f'''---
332
  license: creativeml-openrail-m
@@ -337,7 +294,7 @@ widget:
337
  ---
338
  ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training) with the {which_model} base model
339
 
340
- You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
341
 
342
  Sample pictures of:
343
  {image_string}
@@ -402,14 +359,14 @@ def check_status(top_description):
402
  update_top_tag = gr.update(value=f'''
403
  <div class="gr-prose" style="max-width: 80%">
404
  <h2>Your model has finished training ✅</h2>
405
- <p>Yay, congratulations on training your model. Scroll down to play with with it, save it (either downloading it or on the Hugging Face Hub). Once you are done, your model is safe, and you don't want to train a new one, go to the <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}" target="_blank">settings page</a> and downgrade your Space to a CPU Basic</p>
406
  </div>
407
  ''')
408
  else:
409
  update_top_tag = gr.update(value=f'''
410
  <div class="gr-prose" style="max-width: 80%">
411
  <h2>Your model has finished training ✅</h2>
412
- <p>Yay, congratulations on training your model. Scroll down to play with with it, save it (either downloading it or on the Hugging Face Hub).</p>
413
  </div>
414
  ''')
415
  show_outputs = True
@@ -417,7 +374,7 @@ def check_status(top_description):
417
  update_top_tag = gr.update(value='''
418
  <div class="gr-prose" style="max-width: 80%">
419
  <h2>Don't worry, your model is still training! ⌛</h2>
420
- <p>You closed the tab while your model was training, but it's all good! It is still training right now. You can click the "Open logs" button above here to check the training status. Once training is done, reload this tab to interact with your model</p>
421
  </div>
422
  ''')
423
  show_outputs = False
@@ -446,7 +403,7 @@ with gr.Blocks(css=css) as demo:
446
  <div class="gr-prose" style="max-width: 80%">
447
  <h2>Attention - This Space doesn't work in this shared UI</h2>
448
  <p>For it to work, you can either run locally or duplicate the Space and run it on your own profile using a (paid) private T4 GPU for training. As each T4 costs US$0.60/h, it should cost < US$1 to train most models using default settings!&nbsp;&nbsp;<a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></p>
449
- <img class="instruction" src="file/duplicate.png">
450
  <img class="arrow" src="file/arrow.png" />
451
  </div>
452
  ''')
@@ -455,39 +412,48 @@ with gr.Blocks(css=css) as demo:
455
  top_description = gr.HTML(f'''
456
  <div class="gr-prose" style="max-width: 80%">
457
  <h2>You have successfully associated a GPU to the Dreambooth Training Space 🎉</h2>
458
- <p>Certify that you got a T4. You can now train your model! You will be billed by the minute from when you activated the GPU until when it is turned it off.</p>
459
  </div>
460
  ''')
461
  else:
462
  top_description = gr.HTML(f'''
463
  <div class="gr-prose" style="max-width: 80%">
464
  <h2>You have successfully duplicated the Dreambooth Training Space 🎉</h2>
465
- <p>There's only one step left before you can train your model: <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings" style="text-decoration: underline" target="_blank">attribute a <b>T4 GPU</b> to it (via the Settings tab)</a> and run the training below. Other GPUs are not compatible for now. You will be billed by the minute from when you activate the GPU until when it is turned it off.</p>
466
  </div>
467
  ''')
468
  else:
469
  top_description = gr.HTML(f'''
470
- <div class="gr-prose" style="max-width: 80%">
471
- <h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
472
- <p>Do a <code>pip install requirements-local.txt</code></p>
 
 
 
 
 
 
 
 
 
 
 
 
473
  </div>
474
  ''')
475
- gr.Markdown("# Dreambooth Training UI 💭")
476
- gr.Markdown("Customize Stable Diffusion v1 or v2 (ⁿᵉʷ!) by giving it a few examples of a concept. Based on the [🧨 diffusers](https://github.com/huggingface/diffusers) implementation, additional techniques from [TheLastBen](https://github.com/TheLastBen/diffusers) and [ShivamShrirao](https://github.com/ShivamShrirao/diffusers)")
477
-
478
- with gr.Row() as what_are_you_training:
479
- type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
480
- base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512", "v2-768"], value="v1-5", interactive=True)
481
-
482
- #Very hacky approach to emulate dynamically created Gradio components
483
  with gr.Row() as upload_your_concept:
484
  with gr.Column():
485
  thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
486
  thing_experimental = gr.Checkbox(label="Improve faces (prior preservation) - can take longer training but can improve faces", visible=False, value=False)
487
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
488
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
489
-
490
  with gr.Column():
 
 
 
491
  file_collection = []
492
  concept_collection = []
493
  buttons_collection = []
@@ -506,45 +472,45 @@ with gr.Blocks(css=css) as demo:
506
 
507
  file_collection.append(gr.File(label=f'''Upload the images for your {ordinal(x+1) if (x>0) else ""} concept''', file_count="multiple", interactive=True, visible=visible))
508
  with gr.Column(visible=visible) as row[x]:
509
- concept_collection.append(gr.Textbox(label=f'''{ordinal(x+1) if (x>0) else ""} concept prompt - use a unique, made up word to avoid collisions'''))
510
  with gr.Row():
511
  if(x < maximum_concepts-1):
512
  buttons_collection.append(gr.Button(value="Add +1 concept", visible=visible))
513
  if(x > 0):
514
  delete_collection.append(gr.Button(value=f"Delete {ordinal(x+1)} concept"))
515
-
516
  counter_add = 1
517
  for button in buttons_collection:
518
  if(counter_add < len(buttons_collection)):
519
  button.click(lambda:
520
  [gr.update(visible=True),gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), True, None],
521
- None,
522
  [row[counter_add], file_collection[counter_add], buttons_collection[counter_add-1], buttons_collection[counter_add], is_visible[counter_add], file_collection[counter_add]], queue=False)
523
  else:
524
  button.click(lambda:[gr.update(visible=True),gr.update(visible=True), gr.update(visible=False), True], None, [row[counter_add], file_collection[counter_add], buttons_collection[counter_add-1], is_visible[counter_add]], queue=False)
525
  counter_add += 1
526
-
527
  counter_delete = 1
528
  for delete_button in delete_collection:
529
  if(counter_delete < len(delete_collection)+1):
530
  delete_button.click(lambda:[gr.update(visible=False),gr.update(visible=False), gr.update(visible=True), False], None, [file_collection[counter_delete], row[counter_delete], buttons_collection[counter_delete-1], is_visible[counter_delete]], queue=False)
531
  counter_delete += 1
532
-
533
  with gr.Accordion("Custom Settings", open=False):
534
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
535
  gr.Markdown("If not checked, the % of frozen encoder will be tuned automatically to whether you are training an `object`, `person` or `style`. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 75% trained for persons. The number of steps varies between 1400 and 2400 depending on how many images uploaded. If you see too many artifacts in your output, it means it may have overfit and you need less steps. If your results aren't really what you wanted, it may be underfitting and you need more steps.")
536
  steps = gr.Number(label="How many steps", value=2400)
537
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
538
-
539
  with gr.Box(visible=False) as training_summary:
540
  training_summary_text = gr.HTML("", visible=True, label="Training Summary")
541
  is_advanced_visible = True if is_spaces else False
542
  training_summary_checkbox = gr.Checkbox(label="Automatically remove paid GPU attribution and upload model to the Hugging Face Hub after training", value=True, visible=is_advanced_visible)
543
  training_summary_model_name = gr.Textbox(label="Name of your model", visible=True)
544
  training_summary_where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], value="My personal profile", label="Upload to", visible=True)
545
- training_summary_token_message = gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.", visible=True)
546
  training_summary_token = gr.Textbox(label="Hugging Face Write Token", type="password", visible=True)
547
-
548
  train_btn = gr.Button("Start Training")
549
  if(is_shared_ui):
550
  training_ongoing = gr.Markdown("## This Space only works in duplicated instances. Please duplicate it and try again!", visible=False)
@@ -552,11 +518,11 @@ with gr.Blocks(css=css) as demo:
552
  training_ongoing = gr.Markdown("## Oops, you haven't associated your T4 GPU to this Space. Visit the Settings tab, associate and try again.", visible=False)
553
  else:
554
  training_ongoing = gr.Markdown("## Training is ongoing ⌛... You can close this tab if you like or just wait. If you did not check the `Remove GPU After training`, you can come back here to try your model and upload it after training. Don't forget to remove the GPU attribution after you are done. ", visible=False)
555
-
556
  #Post-training UI
557
- completed_training = gr.Markdown('''# ✅ Training completed.
558
  ### Don't forget to remove the GPU attribution after you are done trying and uploading your model''', visible=False)
559
-
560
  with gr.Row():
561
  with gr.Box(visible=False) as try_your_model:
562
  gr.Markdown("## Try your model")
@@ -564,54 +530,53 @@ with gr.Blocks(css=css) as demo:
564
  result_image = gr.Image()
565
  inference_steps = gr.Slider(minimum=1, maximum=150, value=50, step=1)
566
  generate_button = gr.Button("Generate Image")
567
-
568
  with gr.Box(visible=False) as push_to_hub:
569
  gr.Markdown("## Push to Hugging Face Hub")
570
  model_name = gr.Textbox(label="Name of your model", placeholder="Tarsila do Amaral Style")
571
  where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to")
572
  gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.")
573
  hf_token = gr.Textbox(label="Hugging Face Write Token", type="password")
574
-
575
  push_button = gr.Button("Push to the Hub")
576
-
577
  result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
578
  success_message_upload = gr.Markdown(visible=False)
579
  convert_button = gr.Button("Convert to CKPT", visible=False)
580
-
581
  #Swap the examples and the % of text encoder trained depending if it is an object, person or style
582
- type_of_thing.change(fn=swap_text, inputs=[type_of_thing, base_model_to_use], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder, thing_experimental], queue=False, show_progress=False)
583
-
584
  #Swap the base model
585
- base_model_to_use.change(fn=swap_text, inputs=[type_of_thing, base_model_to_use], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder, thing_experimental], queue=False, show_progress=False)
586
  base_model_to_use.change(fn=swap_base_model, inputs=base_model_to_use, outputs=[])
587
 
588
- #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
589
  for file in file_collection:
590
  #file.change(fn=update_steps,inputs=file_collection, outputs=steps)
591
- file.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
592
-
593
- thing_experimental.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
594
- base_model_to_use.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
595
- steps.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
596
- perc_txt_encoder.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
597
-
598
  #Give more options if the user wants to finish everything after training
599
  if(is_spaces):
600
  training_summary_checkbox.change(fn=checkbox_swap, inputs=training_summary_checkbox, outputs=[training_summary_token_message, training_summary_token, training_summary_model_name, training_summary_where_to_upload],queue=False, show_progress=False)
601
  #Add a message for while it is in training
602
  train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
603
-
604
  #The main train function
605
- train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[base_model_to_use]+[thing_experimental]+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
606
-
607
  #Button to generate an image from your trained model after training
608
  generate_button.click(fn=generate, inputs=[prompt, inference_steps], outputs=result_image, queue=False)
609
  #Button to push the model to the Hugging Face Hub
610
  push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token, base_model_to_use], outputs=[success_message_upload, result], queue=False)
611
- #Button to convert the model to ckpt format
612
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
613
-
614
  #Checks if the training is running
615
  demo.load(fn=check_status, inputs=top_description, outputs=[top_description, try_your_model, push_to_hub, result, convert_button], queue=False, show_progress=False)
616
 
617
- demo.queue(default_enabled=False).launch(debug=True)
 
54
  show_prior_preservation=False
55
  if(show_prior_preservation):
56
  prior_preservation_box_update = gr.update(visible=show_prior_preservation)
57
+ else:
58
  prior_preservation_box_update = gr.update(visible=show_prior_preservation, value=False)
59
  return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. You can use services like <a style='text-decoration: underline' target='_blank' href='https://www.birme.net/?target_width={resize_width}&target_height={resize_width}'>birme</a> for smart cropping. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to {resize_width}x{resize_width}.", freeze_for, prior_preservation_box_update]
60
  elif(option == "style"):
 
81
  if(files):
82
  concept_counter+=1
83
  file_counter+=len(files)
84
+ uses_custom = inputs[-1]
85
+ selected_model = inputs[-4]
86
+ experimental_faces = inputs[-5]
 
87
  if(uses_custom):
88
  Training_Steps = int(inputs[-3])
89
  else:
90
  Training_Steps = file_counter*150
 
 
91
  if(is_spaces):
92
  if(selected_model == "v1-5"):
93
  its = 1.1
 
99
  its = 0.7
100
  elif(selected_model == "v2-768"):
101
  its = 0.5
102
+ summary_sentence = f'''You are going to train {concept_counter}, with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/its, 2)} seconds, or {round((Training_Steps/its)/60, 2)} minutes.
103
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is below US${round((((Training_Steps/its)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
104
  If you check the box below the GPU attribution will automatically removed after training is done and the model is uploaded. If not, don't forget to come back here and swap the hardware back to CPU.<br><br>'''
105
  else:
106
+ summary_sentence = f'''You are going to train {concept_counter}, with {file_counter} images for {Training_Steps} steps.<br><br>'''
107
+
108
  return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
109
 
110
  def update_steps(*files_list):
 
138
  del pipe
139
  pipe_is_set = False
140
  gc.collect()
141
+
142
  if os.path.exists("output_model"): shutil.rmtree('output_model')
143
  if os.path.exists("instance_images"): shutil.rmtree('instance_images')
144
  if os.path.exists("diffusers_model.tar"): os.remove("diffusers_model.tar")
 
163
  image = image.convert('RGB')
164
  image.save(f'instance_images/{prompt}_({j+1}).jpg', format="JPEG", quality = 100)
165
  file_counter += 1
166
+
167
  os.makedirs('output_model',exist_ok=True)
168
+ uses_custom = inputs[-1]
169
+ remove_attribution_after = inputs[-5]
170
+ experimental_face_improvement = inputs[-8]
171
+
 
172
  if(uses_custom):
173
  Training_Steps = int(inputs[-3])
174
  Train_text_encoder_for = int(inputs[-2])
175
  else:
176
+ Train_text_encoder_for=30
 
 
 
 
 
 
 
 
177
  Training_Steps = file_counter*150
 
 
178
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
179
+ gradient_checkpointing = True if (experimental_face_improvement or which_model != "v1-5") else False
180
  cache_latents = True if which_model != "v1-5" else False
181
+ args_general = argparse.Namespace(
182
+ image_captions_filename = True,
183
+ train_text_encoder = True if stptxt > 0 else False,
184
+ stop_text_encoder_training = stptxt,
185
+ save_n_steps = 0,
186
+ pretrained_model_name_or_path = model_to_load,
187
+ instance_data_dir="instance_images",
188
+ class_data_dir="Mix",
189
+ output_dir="output_model",
190
+ with_prior_preservation=True,
191
+ prior_loss_weight=1.0,
192
+ instance_prompt="",
193
+ seed=42,
194
+ resolution=resolution,
195
+ mixed_precision="fp16",
196
+ train_batch_size=1,
197
+ gradient_accumulation_steps=1,
198
+ use_8bit_adam=True,
199
+ learning_rate=2e-6,
200
+ lr_scheduler="polynomial",
201
+ lr_warmup_steps = 0,
202
+ max_train_steps=Training_Steps,
203
+ num_class_images=200,
204
+ gradient_checkpointing=gradient_checkpointing,
205
+ cache_latents=cache_latents,
206
+ )
207
+ print("Starting multi-training...")
208
+ lock_file = open("intraining.lock", "w")
209
+ lock_file.close()
210
+ run_training(args_general)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  gc.collect()
212
  torch.cuda.empty_cache()
213
  if(which_model == "v1-5"):
 
215
  shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
216
  shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
217
  shutil.copy(f"model_index.json", "output_model/model_index.json")
218
+
219
  if(not remove_attribution_after):
220
  print("Archiving model file...")
221
  with tarfile.open("diffusers_model.tar", "w") as tar:
 
252
  pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
253
  pipe = pipe.to("cuda")
254
  pipe_is_set = True
255
+
256
+ image = pipe(prompt, num_inference_steps=steps).images[0]
257
  return(image)
258
+
259
  def push(model_name, where_to_upload, hf_token, which_model, comes_from_automated=False):
260
  if(not os.path.exists("model.ckpt")):
261
  convert("output_model", "model.ckpt")
 
264
  model_name_slug = slugify(model_name)
265
  api = HfApi()
266
  your_username = api.whoami(token=hf_token)["name"]
267
+ if(where_to_upload == "My personal profile"):
268
  model_id = f"{your_username}/{model_name_slug}"
269
  else:
270
  model_id = f"sd-dreambooth-library/{model_name_slug}"
271
  headers = {"Authorization" : f"Bearer: {hf_token}", "Content-Type": "application/json"}
272
  response = requests.post("https://huggingface.co/organizations/sd-dreambooth-library/share/SSeOwppVCscfTEzFGQaqpfcjukVeNrKNHX", headers=headers)
273
+
274
  images_upload = os.listdir("instance_images")
275
  image_string = ""
276
  instance_prompt_list = []
 
283
  else:
284
  title_instance_prompt_string = ''
285
  previous_instance_prompt = instance_prompt
286
+ image_string = f'''{title_instance_prompt_string} {"(use that on your prompt)" if title_instance_prompt_string != "" else ""}
287
  {image_string}![{instance_prompt} {i}](https://huggingface.co/{model_id}/resolve/main/concept_images/{urllib.parse.quote(image)})'''
288
  readme_text = f'''---
289
  license: creativeml-openrail-m
 
294
  ---
295
  ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training) with the {which_model} base model
296
 
297
+ You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
298
 
299
  Sample pictures of:
300
  {image_string}
 
359
  update_top_tag = gr.update(value=f'''
360
  <div class="gr-prose" style="max-width: 80%">
361
  <h2>Your model has finished training ✅</h2>
362
+ <p>Yay, congratulations on training your model. Scroll down to play with with it, save it (either downloading it or on the Hugging Face Hub). Once you are done, your model is safe, and you don't want to train a new one, go to the <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}" target="_blank">settings page</a> and downgrade your Space to a CPU Basic</p>
363
  </div>
364
  ''')
365
  else:
366
  update_top_tag = gr.update(value=f'''
367
  <div class="gr-prose" style="max-width: 80%">
368
  <h2>Your model has finished training ✅</h2>
369
+ <p>Yay, congratulations on training your model. Scroll down to play with with it, save it (either downloading it or on the Hugging Face Hub).</p>
370
  </div>
371
  ''')
372
  show_outputs = True
 
374
  update_top_tag = gr.update(value='''
375
  <div class="gr-prose" style="max-width: 80%">
376
  <h2>Don't worry, your model is still training! ⌛</h2>
377
+ <p>You closed the tab while your model was training, but it's all good! It is still training right now. You can click the "Open logs" button above here to check the training status. Once training is done, reload this tab to interact with your model</p>
378
  </div>
379
  ''')
380
  show_outputs = False
 
403
  <div class="gr-prose" style="max-width: 80%">
404
  <h2>Attention - This Space doesn't work in this shared UI</h2>
405
  <p>For it to work, you can either run locally or duplicate the Space and run it on your own profile using a (paid) private T4 GPU for training. As each T4 costs US$0.60/h, it should cost < US$1 to train most models using default settings!&nbsp;&nbsp;<a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></p>
406
+ <img class="instruction" src="file/duplicate.png">
407
  <img class="arrow" src="file/arrow.png" />
408
  </div>
409
  ''')
 
412
  top_description = gr.HTML(f'''
413
  <div class="gr-prose" style="max-width: 80%">
414
  <h2>You have successfully associated a GPU to the Dreambooth Training Space 🎉</h2>
415
+ <p>Certify that you got a T4. You can now train your model! You will be billed by the minute from when you activated the GPU until when it is turned it off.</p>
416
  </div>
417
  ''')
418
  else:
419
  top_description = gr.HTML(f'''
420
  <div class="gr-prose" style="max-width: 80%">
421
  <h2>You have successfully duplicated the Dreambooth Training Space 🎉</h2>
422
+ <p>There's only one step left before you can train your model: <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings" style="text-decoration: underline" target="_blank">attribute a <b>T4 GPU</b> to it (via the Settings tab)</a> and run the training below. Other GPUs are not compatible for now. You will be billed by the minute from when you activate the GPU until when it is turned it off.</p>
423
  </div>
424
  ''')
425
  else:
426
  top_description = gr.HTML(f'''
427
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
428
+ <div>
429
+ <img class="logo" src="file/mirage.png" alt="Mirage Logo"
430
+ style="margin: auto; max-width: 7rem;">
431
+ <br />
432
+ <h1 style="font-weight: 900; font-size: 2.5rem;">
433
+ Dreambooth Training UI
434
+ </h1>
435
+ </div>
436
+ <br />
437
+ <br />
438
+ <p style="margin-bottom: 10px; font-size: 94%">
439
+ Customize Stable Diffusion v1 or v2 by giving it a few examples of a concept.
440
+ Based on the <a href="https://github.com/huggingface/diffusers">diffusers</a> implementation, additional techniques from <a href="https://github.com/TheLastBen/diffusers">TheLastBen</a> and <a href="https://github.com/ShivamShrirao/diffusers">ShivamShrirao</a>"
441
+ </p>
442
  </div>
443
  ''')
444
+
445
+ #Very hacky approach to emulate dynamically created Gradio components
 
 
 
 
 
 
446
  with gr.Row() as upload_your_concept:
447
  with gr.Column():
448
  thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
449
  thing_experimental = gr.Checkbox(label="Improve faces (prior preservation) - can take longer training but can improve faces", visible=False, value=False)
450
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
451
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
452
+
453
  with gr.Column():
454
+ with gr.Row() as what_are_you_training:
455
+ base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512", "v2-768"], value="v1-5", interactive=True)
456
+
457
  file_collection = []
458
  concept_collection = []
459
  buttons_collection = []
 
472
 
473
  file_collection.append(gr.File(label=f'''Upload the images for your {ordinal(x+1) if (x>0) else ""} concept''', file_count="multiple", interactive=True, visible=visible))
474
  with gr.Column(visible=visible) as row[x]:
475
+ concept_collection.append(gr.Textbox(label=f'''{ordinal(x+1) if (x>0) else ""} concept prompt - use a unique, made up word to avoid collisions'''))
476
  with gr.Row():
477
  if(x < maximum_concepts-1):
478
  buttons_collection.append(gr.Button(value="Add +1 concept", visible=visible))
479
  if(x > 0):
480
  delete_collection.append(gr.Button(value=f"Delete {ordinal(x+1)} concept"))
481
+
482
  counter_add = 1
483
  for button in buttons_collection:
484
  if(counter_add < len(buttons_collection)):
485
  button.click(lambda:
486
  [gr.update(visible=True),gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), True, None],
487
+ None,
488
  [row[counter_add], file_collection[counter_add], buttons_collection[counter_add-1], buttons_collection[counter_add], is_visible[counter_add], file_collection[counter_add]], queue=False)
489
  else:
490
  button.click(lambda:[gr.update(visible=True),gr.update(visible=True), gr.update(visible=False), True], None, [row[counter_add], file_collection[counter_add], buttons_collection[counter_add-1], is_visible[counter_add]], queue=False)
491
  counter_add += 1
492
+
493
  counter_delete = 1
494
  for delete_button in delete_collection:
495
  if(counter_delete < len(delete_collection)+1):
496
  delete_button.click(lambda:[gr.update(visible=False),gr.update(visible=False), gr.update(visible=True), False], None, [file_collection[counter_delete], row[counter_delete], buttons_collection[counter_delete-1], is_visible[counter_delete]], queue=False)
497
  counter_delete += 1
498
+
499
  with gr.Accordion("Custom Settings", open=False):
500
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
501
  gr.Markdown("If not checked, the % of frozen encoder will be tuned automatically to whether you are training an `object`, `person` or `style`. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 75% trained for persons. The number of steps varies between 1400 and 2400 depending on how many images uploaded. If you see too many artifacts in your output, it means it may have overfit and you need less steps. If your results aren't really what you wanted, it may be underfitting and you need more steps.")
502
  steps = gr.Number(label="How many steps", value=2400)
503
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
504
+
505
  with gr.Box(visible=False) as training_summary:
506
  training_summary_text = gr.HTML("", visible=True, label="Training Summary")
507
  is_advanced_visible = True if is_spaces else False
508
  training_summary_checkbox = gr.Checkbox(label="Automatically remove paid GPU attribution and upload model to the Hugging Face Hub after training", value=True, visible=is_advanced_visible)
509
  training_summary_model_name = gr.Textbox(label="Name of your model", visible=True)
510
  training_summary_where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], value="My personal profile", label="Upload to", visible=True)
511
+ training_summary_token_message = gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.", visible=True)
512
  training_summary_token = gr.Textbox(label="Hugging Face Write Token", type="password", visible=True)
513
+
514
  train_btn = gr.Button("Start Training")
515
  if(is_shared_ui):
516
  training_ongoing = gr.Markdown("## This Space only works in duplicated instances. Please duplicate it and try again!", visible=False)
 
518
  training_ongoing = gr.Markdown("## Oops, you haven't associated your T4 GPU to this Space. Visit the Settings tab, associate and try again.", visible=False)
519
  else:
520
  training_ongoing = gr.Markdown("## Training is ongoing ⌛... You can close this tab if you like or just wait. If you did not check the `Remove GPU After training`, you can come back here to try your model and upload it after training. Don't forget to remove the GPU attribution after you are done. ", visible=False)
521
+
522
  #Post-training UI
523
+ completed_training = gr.Markdown('''# ✅ Training completed.
524
  ### Don't forget to remove the GPU attribution after you are done trying and uploading your model''', visible=False)
525
+
526
  with gr.Row():
527
  with gr.Box(visible=False) as try_your_model:
528
  gr.Markdown("## Try your model")
 
530
  result_image = gr.Image()
531
  inference_steps = gr.Slider(minimum=1, maximum=150, value=50, step=1)
532
  generate_button = gr.Button("Generate Image")
533
+
534
  with gr.Box(visible=False) as push_to_hub:
535
  gr.Markdown("## Push to Hugging Face Hub")
536
  model_name = gr.Textbox(label="Name of your model", placeholder="Tarsila do Amaral Style")
537
  where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to")
538
  gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.")
539
  hf_token = gr.Textbox(label="Hugging Face Write Token", type="password")
540
+
541
  push_button = gr.Button("Push to the Hub")
542
+
543
  result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
544
  success_message_upload = gr.Markdown(visible=False)
545
  convert_button = gr.Button("Convert to CKPT", visible=False)
546
+
547
  #Swap the examples and the % of text encoder trained depending if it is an object, person or style
548
+
 
549
  #Swap the base model
550
+ base_model_to_use.change(fn=swap_text, inputs=[base_model_to_use], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder, thing_experimental], queue=False, show_progress=False)
551
  base_model_to_use.change(fn=swap_base_model, inputs=base_model_to_use, outputs=[])
552
 
553
+ #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
554
  for file in file_collection:
555
  #file.change(fn=update_steps,inputs=file_collection, outputs=steps)
556
+ file.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
557
+
558
+ thing_experimental.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
559
+ base_model_to_use.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
560
+ steps.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
561
+ perc_txt_encoder.change(fn=count_files, inputs=file_collection+[thing_experimental]+[base_model_to_use]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
562
+
563
  #Give more options if the user wants to finish everything after training
564
  if(is_spaces):
565
  training_summary_checkbox.change(fn=checkbox_swap, inputs=training_summary_checkbox, outputs=[training_summary_token_message, training_summary_token, training_summary_model_name, training_summary_where_to_upload],queue=False, show_progress=False)
566
  #Add a message for while it is in training
567
  train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
568
+
569
  #The main train function
570
+ train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[base_model_to_use]+[thing_experimental]+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
571
+
572
  #Button to generate an image from your trained model after training
573
  generate_button.click(fn=generate, inputs=[prompt, inference_steps], outputs=result_image, queue=False)
574
  #Button to push the model to the Hugging Face Hub
575
  push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token, base_model_to_use], outputs=[success_message_upload, result], queue=False)
576
+ #Button to convert the model to ckpt format
577
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
578
+
579
  #Checks if the training is running
580
  demo.load(fn=check_status, inputs=top_description, outputs=[top_description, try_your_model, push_to_hub, result, convert_button], queue=False, show_progress=False)
581
 
582
+ demo.queue(default_enabled=False).launch(debug=True, share=True)
cat-toy-deprec.png DELETED
Binary file (928 kB)
 
cat-toy.png CHANGED
cattoy.png DELETED
Binary file (693 kB)
 
mirage.png ADDED