{ "doc": { "base": "base optimizer configuration for unet and text encoder", "text_encoder_overrides": "text encoder config overrides", "text_encoder_lr_scale": "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.", "-----------------": "-----------------", "optimizer": "adamw, adamw8bit, lion", "optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains", "lr": "learning rate, if null will use CLI or main JSON config value", "lr_scheduler": "'constant' or 'cosine'", "lr_warmup_steps": "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs", "lr_decay_steps": "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs", "betas": "exponential decay rates for the moment estimates", "epsilon": "value added to denominator for numerical stability, unused for lion", "weight_decay": "weight decay (L2 penalty)", "------------------": "-----------------", "freeze_embeddings": "whether to freeze the text embeddings", "freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)", "freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm" }, "base": { "optimizer": "adamw", "lr": 5e-8, "lr_scheduler": "constant", "lr_decay_steps": null, "lr_warmup_steps": null, "betas": [0.9, 0.999], "epsilon": 1e-8, "weight_decay": 0.010 }, "text_encoder_overrides": { "optimizer": null, "lr": 1.5e-7, "lr_scheduler": null, "lr_decay_steps": null, "lr_warmup_steps": null, "betas": null, "epsilon": null, "weight_decay": null }, "text_encoder_freezing": { "freeze_embeddings": false, "freeze_front_n_layers": null, "freeze_final_layer_norm": false } }