gustproof
/

sd-models

gustproof commited on Apr 7, 2023

Commit

7582980

•

1 Parent(s): 5d53d8f

Update deresute/release_notes/v1.3.md

Files changed (1) hide show

deresute/release_notes/v1.3.md CHANGED Viewed

@@ -315,6 +315,13 @@ Tags generated with [SmilingWolf/wd-v1-4-convnextv2-tagger-v2](https://huggingfa
 The data source also provides info about which other characters are also present in the card art ("cameos"), so following [a previous model](https://civitai.com/models/27600/)
 the caption template `MainChar/OtherChar1/OtherChar2..., other tags shuffled` was used. However, it worked poorly this time; character attributes blend noticeably.
 Training cost: ~7 T4-hours (in addition to v1.1 and v1.2 costs)
@@ -438,8 +445,7 @@ cache_latents = false
 optimizer_type = "AdamW"
 learning_rate = 0.001
 max_grad_norm = 1.0
-lr_scheduler = "cosine"
-lr_warmup_steps = 100
 [dataset_arguments]
 debug_dataset = false
@@ -452,7 +458,7 @@ save_every_n_epochs = 1
 max_token_length = 225
 mem_eff_attn = false
 xformers = true
-max_train_epochs = 20
 max_data_loader_n_workers = 1
 persistent_data_loader_workers = false
 gradient_checkpointing = false

 The data source also provides info about which other characters are also present in the card art ("cameos"), so following [a previous model](https://civitai.com/models/27600/)
 the caption template `MainChar/OtherChar1/OtherChar2..., other tags shuffled` was used. However, it worked poorly this time; character attributes blend noticeably.
+Inspired by [a 2022 paper](https://openreview.net/pdf?id=Uad23IcIEs), a custom LR scheduler was used:
+```
+SequentialLR(optimizer, [
+    LinearLR(optimizer, 0.1, total_iters=steps_per_epoch),
+    CosineAnnealingWarmRestarts(optimizer, steps_per_epoch*2, T_mult=2)
+], [steps_per_epoch])
+```
 Training cost: ~7 T4-hours (in addition to v1.1 and v1.2 costs)
 optimizer_type = "AdamW"
 learning_rate = 0.001
 max_grad_norm = 1.0
+# custom lr schedule
 [dataset_arguments]
 debug_dataset = false
 max_token_length = 225
 mem_eff_attn = false
 xformers = true
+max_train_epochs = 15
 max_data_loader_n_workers = 1
 persistent_data_loader_workers = false
 gradient_checkpointing = false