gustproof commited on
Commit
7582980
1 Parent(s): 5d53d8f

Update deresute/release_notes/v1.3.md

Browse files
Files changed (1) hide show
  1. deresute/release_notes/v1.3.md +9 -3
deresute/release_notes/v1.3.md CHANGED
@@ -315,6 +315,13 @@ Tags generated with [SmilingWolf/wd-v1-4-convnextv2-tagger-v2](https://huggingfa
315
  The data source also provides info about which other characters are also present in the card art ("cameos"), so following [a previous model](https://civitai.com/models/27600/)
316
  the caption template `MainChar/OtherChar1/OtherChar2..., other tags shuffled` was used. However, it worked poorly this time; character attributes blend noticeably.
317
 
 
 
 
 
 
 
 
318
 
319
  Training cost: ~7 T4-hours (in addition to v1.1 and v1.2 costs)
320
 
@@ -438,8 +445,7 @@ cache_latents = false
438
  optimizer_type = "AdamW"
439
  learning_rate = 0.001
440
  max_grad_norm = 1.0
441
- lr_scheduler = "cosine"
442
- lr_warmup_steps = 100
443
 
444
  [dataset_arguments]
445
  debug_dataset = false
@@ -452,7 +458,7 @@ save_every_n_epochs = 1
452
  max_token_length = 225
453
  mem_eff_attn = false
454
  xformers = true
455
- max_train_epochs = 20
456
  max_data_loader_n_workers = 1
457
  persistent_data_loader_workers = false
458
  gradient_checkpointing = false
 
315
  The data source also provides info about which other characters are also present in the card art ("cameos"), so following [a previous model](https://civitai.com/models/27600/)
316
  the caption template `MainChar/OtherChar1/OtherChar2..., other tags shuffled` was used. However, it worked poorly this time; character attributes blend noticeably.
317
 
318
+ Inspired by [a 2022 paper](https://openreview.net/pdf?id=Uad23IcIEs), a custom LR scheduler was used:
319
+ ```
320
+ SequentialLR(optimizer, [
321
+ LinearLR(optimizer, 0.1, total_iters=steps_per_epoch),
322
+ CosineAnnealingWarmRestarts(optimizer, steps_per_epoch*2, T_mult=2)
323
+ ], [steps_per_epoch])
324
+ ```
325
 
326
  Training cost: ~7 T4-hours (in addition to v1.1 and v1.2 costs)
327
 
 
445
  optimizer_type = "AdamW"
446
  learning_rate = 0.001
447
  max_grad_norm = 1.0
448
+ # custom lr schedule
 
449
 
450
  [dataset_arguments]
451
  debug_dataset = false
 
458
  max_token_length = 225
459
  mem_eff_attn = false
460
  xformers = true
461
+ max_train_epochs = 15
462
  max_data_loader_n_workers = 1
463
  persistent_data_loader_workers = false
464
  gradient_checkpointing = false