pxovela commited on
Commit
afaaa2e
1 Parent(s): 914aac9

Upload 5 files

Browse files
v4 configs_and_logs/events.out.tfevents.1686011608.irakli-pc.32663.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab88b7b1f9577b163f7e980a38a6cf81525e5f360de5e77e3579e8c4908f030a
3
+ size 116841153
v4 configs_and_logs/optimizer.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "doc": {
3
+ "base": "base optimizer configuration for unet and text encoder",
4
+ "text_encoder_overrides": "text encoder config overrides",
5
+ "text_encoder_lr_scale": "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.",
6
+ "-----------------": "-----------------",
7
+ "optimizer": "adamw, adamw8bit, lion",
8
+ "optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
9
+ "lr": "learning rate, if null will use CLI or main JSON config value",
10
+ "lr_scheduler": "'constant' or 'cosine'",
11
+ "lr_warmup_steps": "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs",
12
+ "lr_decay_steps": "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs",
13
+ "betas": "exponential decay rates for the moment estimates",
14
+ "epsilon": "value added to denominator for numerical stability, unused for lion",
15
+ "weight_decay": "weight decay (L2 penalty)",
16
+ "------------------": "-----------------",
17
+ "freeze_embeddings": "whether to freeze the text embeddings",
18
+ "freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)",
19
+ "freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm"
20
+ },
21
+ "base": {
22
+ "optimizer": "adamw",
23
+ "lr": 5e-8,
24
+ "lr_scheduler": "constant",
25
+ "lr_decay_steps": null,
26
+ "lr_warmup_steps": null,
27
+ "betas": [0.9, 0.999],
28
+ "epsilon": 1e-8,
29
+ "weight_decay": 0.010
30
+ },
31
+ "text_encoder_overrides": {
32
+ "optimizer": null,
33
+ "lr": 1.5e-7,
34
+ "lr_scheduler": null,
35
+ "lr_decay_steps": null,
36
+ "lr_warmup_steps": null,
37
+ "betas": null,
38
+ "epsilon": null,
39
+ "weight_decay": null
40
+ },
41
+ "text_encoder_freezing": {
42
+ "freeze_embeddings": false,
43
+ "freeze_front_n_layers": null,
44
+ "freeze_final_layer_norm": false
45
+ }
46
+ }
v4 configs_and_logs/train.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "batch_size": 4,
3
+ "ckpt_every_n_minutes": null,
4
+ "clip_grad_norm": null,
5
+ "clip_skip": 0,
6
+ "cond_dropout": 0.05,
7
+ "data_root": "/home/irakli/EveryDream2trainer/input/vodka_v4",
8
+ "disable_amp": false,
9
+ "disable_textenc_training": false,
10
+ "disable_xformers": false,
11
+ "flip_p": 0.0,
12
+ "gpuid": 0,
13
+ "gradient_checkpointing": true,
14
+ "grad_accum": 1,
15
+ "logdir": "logs",
16
+ "log_step": 1,
17
+ "lowvram": false,
18
+ "lr_decay_steps": 0,
19
+ "lr_scheduler": "constant",
20
+ "lr_warmup_steps": null,
21
+ "max_epochs": 100,
22
+ "notebook": false,
23
+ "optimizer_config": "optimizer.json",
24
+ "project_name": "vodka_v4_2",
25
+ "resolution": 512,
26
+ "resume_ckpt": "sd_v1-5_vae",
27
+ "run_name": null,
28
+ "sample_prompts": "sample_prompts.txt",
29
+ "sample_steps": 10000,
30
+ "save_ckpt_dir": null,
31
+ "save_ckpts_from_n_epochs": 0,
32
+ "save_every_n_epochs": 25,
33
+ "save_optimizer": false,
34
+ "scale_lr": false,
35
+ "seed": 555,
36
+ "shuffle_tags": false,
37
+ "validation_config": "validation_default.json",
38
+ "wandb": false,
39
+ "write_schedule": false,
40
+ "rated_dataset": false,
41
+ "rated_dataset_target_dropout_percent": 50,
42
+ "zero_frequency_noise_ratio": 0.00
43
+ }
v4 configs_and_logs/validation_default.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "documentation": {
3
+ "validate_training": "If true, validate the training using a separate set of image/caption pairs, and log the results as `loss/val`. The curve will trend downwards as the model trains, then flatten and start to trend upwards as effective training finishes and the model begins to overfit the training data. Very useful for preventing overfitting, for checking if your learning rate is too low or too high, and for deciding when to stop training.",
4
+ "val_split_mode": "Either 'automatic' or 'manual', ignored if validate_training is false. 'automatic' val_split_mode picks a random subset of the training set (the number of items is controlled by auto_split_proportion) and removes them from training to use as a validation set. 'manual' val_split_mode lets you provide your own folder of validation items (images+captions), specified using 'manual_data_root'.",
5
+ "auto_split_proportion": "For 'automatic' val_split_mode, how much of the train dataset that should be removed to use for validation. Typical values are 0.15-0.2 (15-20% of the total dataset). Higher is more accurate but slower.",
6
+ "manual_data_root": "For 'manual' val_split_mode, the path to a folder containing validation items.",
7
+ "extra_manual_datasets": "Dictionary of 'name':'path' pairs defining additional validation datasets to load and log. eg { 'santa_suit': '/path/to/captioned_santa_suit_images', 'flamingo_suit': '/path/to/flamingo_suit_images' }",
8
+ "stabilize_training_loss": "If true, stabilize the train loss curves for `loss/epoch` and `loss/log step` by re-calculating training loss with a fixed random seed, and log the results as `loss/train-stabilized`. This more clearly shows the training progress, but it is not enough alone to tell you if you're overfitting.",
9
+ "stabilize_split_proportion": "For stabilize_training_loss, the proportion of the train dataset to overlap for stabilizing the train loss graph. Typical values are 0.15-0.2 (15-20% of the total dataset). Higher is more accurate but slower.",
10
+ "every_n_epochs": "How often to run validation (1=every epoch, 2=every second epoch; 0.5=twice per epoch, 0.33=three times per epoch, etc.).",
11
+ "seed": "The seed to use when running validation and stabilization passes.",
12
+ "use_relative_loss": "logs val/loss as negative relative to first pre-train val/loss value"
13
+ },
14
+ "validate_training": true,
15
+ "val_split_mode": "automatic",
16
+ "auto_split_proportion": 0.15,
17
+ "manual_data_root": null,
18
+ "extra_manual_datasets" : {},
19
+ "stabilize_training_loss": false,
20
+ "stabilize_split_proportion": 0.15,
21
+ "every_n_epochs": 1,
22
+ "seed": 555,
23
+ "use_relative_loss": false
24
+ }
v4 configs_and_logs/vodka_v4_2-20230606-083312.log ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 06/06/2023 08:33:12 AM Seed: 555
2
+ 06/06/2023 08:33:12 AM unet attention_head_dim: 8
3
+ 06/06/2023 08:33:12 AM Inferred yaml: v1-inference.yaml, attn: sd1, prediction_type: epsilon
4
+ 06/06/2023 08:33:27 AM Enabled xformers
5
+ 06/06/2023 08:33:28 AM Successfully compiled models
6
+ 06/06/2023 08:33:28 AM * DLMA resolution 512, buckets: [[512, 512], [576, 448], [448, 576], [640, 384], [384, 640], [768, 320], [320, 768], [896, 256], [256, 896], [1024, 256], [256, 1024]]
7
+ 06/06/2023 08:33:28 AM Preloading images...
8
+ 06/06/2023 08:38:21 AM * Removed 1628 images from the training set to use for validation
9
+ 06/06/2023 08:38:21 AM * DLMA initialized with 1628 images.
10
+ 06/06/2023 08:38:22 AM ** Dataset 'val': 411 batches, num_images: 1644, batch_size: 4
11
+ 06/06/2023 08:38:22 AM * Aspect ratio bucket (256, 896) has only 1 images. At batch size 4 this makes for an effective multiplier of 4.0, which may cause problems. Consider adding 3 or more images for aspect ratio 2:7, or reducing your batch_size.
12
+ 06/06/2023 08:38:22 AM * DLMA initialized with 9223 images.
13
+ 06/06/2023 08:38:22 AM ** Dataset 'train': 2310 batches, num_images: 9240, batch_size: 4
14
+ 06/06/2023 08:38:22 AM  * text encoder optimizer: AdamW (196 parameters) *
15
+ 06/06/2023 08:38:22 AM  lr: 1.5e-07, betas: [0.9, 0.999], epsilon: 1e-08, weight_decay: 0.01 *
16
+ 06/06/2023 08:38:22 AM  * unet optimizer: AdamW (686 parameters) *
17
+ 06/06/2023 08:38:22 AM  lr: 5e-08, betas: [0.9, 0.999], epsilon: 1e-08, weight_decay: 0.01 *
18
+ 06/06/2023 08:38:22 AM Grad scaler enabled: True (amp mode)
19
+ 06/06/2023 08:38:22 AM Pretraining GPU Memory: 7007 / 24576 MB
20
+ 06/06/2023 08:38:22 AM saving ckpts every 1000000000.0 minutes
21
+ 06/06/2023 08:38:22 AM saving ckpts every 25 epochs
22
+ 06/06/2023 08:38:22 AM unet device: cuda:0, precision: torch.float32, training: True
23
+ 06/06/2023 08:38:22 AM text_encoder device: cuda:0, precision: torch.float32, training: True
24
+ 06/06/2023 08:38:22 AM vae device: cuda:0, precision: torch.float16, training: False
25
+ 06/06/2023 08:38:22 AM scheduler: <class 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler'>
26
+ 06/06/2023 08:38:22 AM Project name: vodka_v4_2
27
+ 06/06/2023 08:38:22 AM grad_accum: 1
28
+ 06/06/2023 08:38:22 AM batch_size: 4
29
+ 06/06/2023 08:38:22 AM epoch_len: 2310
30
+ 06/07/2023 02:35:05 AM Saving model, 25 epochs at step 57750
31
+ 06/07/2023 02:35:05 AM * Saving diffusers model to logs/vodka_v4_2_20230606-083312/ckpts/vodka_v4_2-ep25-gs57750
32
+ 06/07/2023 02:35:10 AM * Saving SD model to ./vodka_v4_2-ep25-gs57750.ckpt
33
+ 06/07/2023 06:18:16 PM Saving model, 25 epochs at step 115500
34
+ 06/07/2023 06:18:16 PM * Saving diffusers model to logs/vodka_v4_2_20230606-083312/ckpts/vodka_v4_2-ep50-gs115500
35
+ 06/07/2023 06:18:31 PM * Saving SD model to ./vodka_v4_2-ep50-gs115500.ckpt
36
+ 06/08/2023 11:19:53 AM Saving model, 25 epochs at step 173250
37
+ 06/08/2023 11:19:53 AM * Saving diffusers model to logs/vodka_v4_2_20230606-083312/ckpts/vodka_v4_2-ep75-gs173250
38
+ 06/08/2023 11:20:17 AM * Saving SD model to ./vodka_v4_2-ep75-gs173250.ckpt
39
+ 06/09/2023 03:45:11 AM * Saving diffusers model to logs/vodka_v4_2_20230606-083312/ckpts/last-vodka_v4_2-ep99-gs231000
40
+ 06/09/2023 03:45:15 AM * Saving SD model to ./last-vodka_v4_2-ep99-gs231000.ckpt
41
+ 06/09/2023 03:45:33 AM Training complete
42
+ 06/09/2023 03:45:33 AM Total training time took 4027.18 minutes, total steps: 231000
43
+ 06/09/2023 03:45:33 AM Average epoch time: 40.23 minutes
44
+ 06/09/2023 03:45:33 AM  ***************************
45
+ 06/09/2023 03:45:33 AM  **** Finished training ****
46
+ 06/09/2023 03:45:33 AM  ***************************