multimodalart HF staff commited on
Commit
88a47b3
1 Parent(s): 8d49336

Change defaults and shuffle class images

Browse files
Files changed (2) hide show
  1. app.py +34 -44
  2. train_dreambooth.py +2 -0
app.py CHANGED
@@ -36,30 +36,30 @@ safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
36
 
37
  model_to_load = model_v1
38
 
39
- #with zipfile.ZipFile("mix.zip", 'r') as zip_ref:
40
- # zip_ref.extractall(".")
41
 
42
  def swap_text(option):
43
  mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
44
  if(option == "object"):
45
  instance_prompt_example = "cttoy"
46
- freeze_for = 50
47
  return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
48
  elif(option == "person"):
49
  instance_prompt_example = "julcto"
50
- freeze_for = 65
51
- return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name the files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
52
  elif(option == "style"):
53
  instance_prompt_example = "trsldamrl"
54
  freeze_for = 10
55
- return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
56
 
57
  def swap_base_model(selected_model):
58
  global model_to_load
59
  if(selected_model == "v1-5"):
60
  model_to_load = model_v1
61
- elif(selected_model == "v2-768"):
62
- model_to_load = model_v2
63
  else:
64
  model_to_load = model_v2_512
65
 
@@ -78,6 +78,10 @@ def count_files(*inputs):
78
  Training_Steps = int(inputs[-3])
79
  else:
80
  Training_Steps = file_counter*200
 
 
 
 
81
  if(is_spaces):
82
  summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
83
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
@@ -151,14 +155,21 @@ def train(*inputs):
151
  Training_Steps = int(inputs[-3])
152
  Train_text_encoder_for = int(inputs[-2])
153
  else:
154
- Training_Steps = file_counter*200
155
  if(type_of_thing == "object"):
156
  Train_text_encoder_for=30
 
157
  elif(type_of_thing == "style"):
158
  Train_text_encoder_for=15
 
159
  elif(type_of_thing == "person"):
160
- Train_text_encoder_for=65
161
-
 
 
 
 
 
 
162
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
163
  if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
164
  args_general = argparse.Namespace(
@@ -187,12 +198,12 @@ def train(*inputs):
187
  lock_file.close()
188
  run_training(args_general)
189
  else:
190
- args_txt_encoder = argparse.Namespace(
191
- image_captions_filename=True,
192
- train_text_encoder=True,
193
- dump_only_text_encoder=True,
194
- pretrained_model_name_or_path=model_to_load,
195
- save_n_steps=0,
196
  instance_data_dir="instance_images",
197
  class_data_dir="Mix",
198
  output_dir="output_model",
@@ -204,38 +215,17 @@ def train(*inputs):
204
  mixed_precision="fp16",
205
  train_batch_size=1,
206
  gradient_accumulation_steps=1,
207
- gradient_checkpointing=True,
208
- use_8bit_adam=True,
209
- learning_rate=2e-6,
210
- lr_scheduler="polynomial",
211
- lr_warmup_steps = 0,
212
- max_train_steps=stptxt,
213
- num_class_images=200
214
- )
215
- args_unet = argparse.Namespace(
216
- image_captions_filename=True,
217
- train_only_unet=True,
218
- save_n_steps=0,
219
- pretrained_model_name_or_path=model_to_load,
220
- instance_data_dir="instance_images",
221
- output_dir="output_model",
222
- instance_prompt="",
223
- seed=42,
224
- resolution=512,
225
- mixed_precision="fp16",
226
- train_batch_size=1,
227
- gradient_accumulation_steps=1,
228
  use_8bit_adam=True,
229
  learning_rate=2e-6,
230
  lr_scheduler="polynomial",
231
  lr_warmup_steps = 0,
232
  max_train_steps=Training_Steps,
 
233
  )
234
  print("Starting multi-training...")
235
  lock_file = open("intraining.lock", "w")
236
  lock_file.close()
237
- run_training(args_txt_encoder)
238
- run_training(args_unet)
239
  gc.collect()
240
  torch.cuda.empty_cache()
241
  if(which_model == "v1-5"):
@@ -453,7 +443,7 @@ with gr.Blocks(css=css) as demo:
453
  with gr.Row() as upload_your_concept:
454
  with gr.Column():
455
  thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
456
- thing_experimental = gr.Checkbox(label="Improve faces (experimental) - takes 1.5x times training, can improve if you are training people's faces", visible=False, value=False)
457
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
458
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
459
 
@@ -502,8 +492,8 @@ with gr.Blocks(css=css) as demo:
502
 
503
  with gr.Accordion("Custom Settings", open=False):
504
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
505
- gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 200. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 65% trained for persons.")
506
- steps = gr.Number(label="How many steps", value=800)
507
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
508
 
509
  with gr.Box(visible=False) as training_summary:
@@ -552,7 +542,7 @@ with gr.Blocks(css=css) as demo:
552
 
553
  #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
554
  for file in file_collection:
555
- file.change(fn=update_steps,inputs=file_collection, outputs=steps)
556
  file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
557
 
558
  steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
 
36
 
37
  model_to_load = model_v1
38
 
39
+ with zipfile.ZipFile("mix.zip", 'r') as zip_ref:
40
+ zip_ref.extractall(".")
41
 
42
  def swap_text(option):
43
  mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
44
  if(option == "object"):
45
  instance_prompt_example = "cttoy"
46
+ freeze_for = 30
47
  return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
48
  elif(option == "person"):
49
  instance_prompt_example = "julcto"
50
+ freeze_for = 70
51
+ return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=True)]
52
  elif(option == "style"):
53
  instance_prompt_example = "trsldamrl"
54
  freeze_for = 10
55
+ return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
56
 
57
  def swap_base_model(selected_model):
58
  global model_to_load
59
  if(selected_model == "v1-5"):
60
  model_to_load = model_v1
61
+ #elif(selected_model == "v2-768"):
62
+ # model_to_load = model_v2
63
  else:
64
  model_to_load = model_v2_512
65
 
 
78
  Training_Steps = int(inputs[-3])
79
  else:
80
  Training_Steps = file_counter*200
81
+ if(Training_Steps > 2400):
82
+ Training_Steps=2400
83
+ elif(Training_Steps < 1400):
84
+ Training_Steps=1400
85
  if(is_spaces):
86
  summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
87
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
 
155
  Training_Steps = int(inputs[-3])
156
  Train_text_encoder_for = int(inputs[-2])
157
  else:
 
158
  if(type_of_thing == "object"):
159
  Train_text_encoder_for=30
160
+
161
  elif(type_of_thing == "style"):
162
  Train_text_encoder_for=15
163
+
164
  elif(type_of_thing == "person"):
165
+ Train_text_encoder_for=75
166
+
167
+ Training_Steps = file_counter*200
168
+ if(Training_Steps > 2400):
169
+ Training_Steps=2400
170
+ elif(Training_Steps < 1400):
171
+ Training_Steps=1400
172
+
173
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
174
  if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
175
  args_general = argparse.Namespace(
 
198
  lock_file.close()
199
  run_training(args_general)
200
  else:
201
+ args_general = argparse.Namespace(
202
+ image_captions_filename = True,
203
+ train_text_encoder = True if stptxt > 0 else False,
204
+ stop_text_encoder_training = stptxt,
205
+ save_n_steps = 0,
206
+ pretrained_model_name_or_path = model_to_load,
207
  instance_data_dir="instance_images",
208
  class_data_dir="Mix",
209
  output_dir="output_model",
 
215
  mixed_precision="fp16",
216
  train_batch_size=1,
217
  gradient_accumulation_steps=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  use_8bit_adam=True,
219
  learning_rate=2e-6,
220
  lr_scheduler="polynomial",
221
  lr_warmup_steps = 0,
222
  max_train_steps=Training_Steps,
223
+ num_class_images=200,
224
  )
225
  print("Starting multi-training...")
226
  lock_file = open("intraining.lock", "w")
227
  lock_file.close()
228
+ run_training(args_general)
 
229
  gc.collect()
230
  torch.cuda.empty_cache()
231
  if(which_model == "v1-5"):
 
443
  with gr.Row() as upload_your_concept:
444
  with gr.Column():
445
  thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
446
+ thing_experimental = gr.Checkbox(label="Improve faces (prior preservation) - can take longer training but can improve faces", visible=False, value=False)
447
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
448
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
449
 
 
492
 
493
  with gr.Accordion("Custom Settings", open=False):
494
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
495
+ gr.Markdown("If not checked, the % of frozen encoder will be tuned automatically to whether you are training an `object`, `person` or `style`. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 75% trained for persons. The number of steps varies between 1400 and 2400 depending on how many images uploaded. If you see too many artifacts in your output, it means it may have overfit and you need less steps. If your results aren't really what you wanted, it may be underfitting and you need more steps.")
496
+ steps = gr.Number(label="How many steps", value=2400)
497
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
498
 
499
  with gr.Box(visible=False) as training_summary:
 
542
 
543
  #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
544
  for file in file_collection:
545
+ #file.change(fn=update_steps,inputs=file_collection, outputs=steps)
546
  file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
547
 
548
  steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
train_dreambooth.py CHANGED
@@ -7,6 +7,7 @@ from typing import Optional
7
  import subprocess
8
  import sys
9
  import gc
 
10
 
11
  import torch
12
  import torch.nn.functional as F
@@ -301,6 +302,7 @@ class DreamBoothDataset(Dataset):
301
  self.class_data_root = Path(class_data_root)
302
  self.class_data_root.mkdir(parents=True, exist_ok=True)
303
  self.class_images_path = list(self.class_data_root.iterdir())
 
304
  self.num_class_images = len(self.class_images_path)
305
  self._length = max(self.num_class_images, self.num_instance_images)
306
  self.class_prompt = class_prompt
 
7
  import subprocess
8
  import sys
9
  import gc
10
+ import random
11
 
12
  import torch
13
  import torch.nn.functional as F
 
302
  self.class_data_root = Path(class_data_root)
303
  self.class_data_root.mkdir(parents=True, exist_ok=True)
304
  self.class_images_path = list(self.class_data_root.iterdir())
305
+ random.shuffle(self.class_images_path)
306
  self.num_class_images = len(self.class_images_path)
307
  self._length = max(self.num_class_images, self.num_instance_images)
308
  self.class_prompt = class_prompt