MrForExample commited on
Commit
e71f0ac
·
verified ·
1 Parent(s): f8eccbc

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. train_enc_dc_f32c32_EqM/.hydra/config.yaml +52 -0
  2. train_enc_dc_f32c32_EqM/.hydra/hydra.yaml +172 -0
  3. train_enc_dc_f32c32_EqM/.hydra/overrides.yaml +5 -0
  4. train_enc_dc_f32c32_EqM/checkpoints/best/custom_checkpoint_0.pkl +3 -0
  5. train_enc_dc_f32c32_EqM/checkpoints/best/model.safetensors +3 -0
  6. train_enc_dc_f32c32_EqM/checkpoints/best/model_1.safetensors +3 -0
  7. train_enc_dc_f32c32_EqM/checkpoints/best/model_2.safetensors +3 -0
  8. train_enc_dc_f32c32_EqM/checkpoints/best/model_ae.safetensors +3 -0
  9. train_enc_dc_f32c32_EqM/checkpoints/best/model_ae_ema.safetensors +3 -0
  10. train_enc_dc_f32c32_EqM/checkpoints/best/model_aux_losses.safetensors +3 -0
  11. train_enc_dc_f32c32_EqM/checkpoints/best/optimizer.bin +3 -0
  12. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_0.pkl +3 -0
  13. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_1.pkl +3 -0
  14. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_2.pkl +3 -0
  15. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_3.pkl +3 -0
  16. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_4.pkl +3 -0
  17. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_5.pkl +3 -0
  18. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_6.pkl +3 -0
  19. train_enc_dc_f32c32_EqM/checkpoints/best/random_states_7.pkl +3 -0
  20. train_enc_dc_f32c32_EqM/checkpoints/last/custom_checkpoint_0.pkl +3 -0
  21. train_enc_dc_f32c32_EqM/checkpoints/last/model.safetensors +3 -0
  22. train_enc_dc_f32c32_EqM/checkpoints/last/model_1.safetensors +3 -0
  23. train_enc_dc_f32c32_EqM/checkpoints/last/model_2.safetensors +3 -0
  24. train_enc_dc_f32c32_EqM/checkpoints/last/model_ae.safetensors +3 -0
  25. train_enc_dc_f32c32_EqM/checkpoints/last/model_ae_ema.safetensors +3 -0
  26. train_enc_dc_f32c32_EqM/checkpoints/last/model_aux_losses.safetensors +3 -0
  27. train_enc_dc_f32c32_EqM/checkpoints/last/optimizer.bin +3 -0
  28. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_0.pkl +3 -0
  29. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_1.pkl +3 -0
  30. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_2.pkl +3 -0
  31. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_3.pkl +3 -0
  32. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_4.pkl +3 -0
  33. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_5.pkl +3 -0
  34. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_6.pkl +3 -0
  35. train_enc_dc_f32c32_EqM/checkpoints/last/random_states_7.pkl +3 -0
  36. train_enc_dc_f32c32_EqM/config.yaml +52 -0
  37. train_enc_dc_f32c32_EqM/main.log +1153 -0
  38. train_enc_dc_f32c32_EqM/tensorboard_logs/events.out.tfevents.1761477560.98629b852e50.63738.0 +3 -0
  39. train_enc_dc_f32c32_FM/.hydra/config.yaml +52 -0
  40. train_enc_dc_f32c32_FM/.hydra/hydra.yaml +172 -0
  41. train_enc_dc_f32c32_FM/.hydra/overrides.yaml +5 -0
  42. train_enc_dc_f32c32_FM/checkpoints/best/custom_checkpoint_0.pkl +3 -0
  43. train_enc_dc_f32c32_FM/checkpoints/best/model.safetensors +3 -0
  44. train_enc_dc_f32c32_FM/checkpoints/best/model_1.safetensors +3 -0
  45. train_enc_dc_f32c32_FM/checkpoints/best/model_2.safetensors +3 -0
  46. train_enc_dc_f32c32_FM/checkpoints/best/model_ae.safetensors +3 -0
  47. train_enc_dc_f32c32_FM/checkpoints/best/model_ae_ema.safetensors +3 -0
  48. train_enc_dc_f32c32_FM/checkpoints/best/model_aux_losses.safetensors +3 -0
  49. train_enc_dc_f32c32_FM/checkpoints/best/optimizer.bin +3 -0
  50. train_enc_dc_f32c32_FM/checkpoints/best/random_states_0.pkl +3 -0
train_enc_dc_f32c32_EqM/.hydra/config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ task: train
3
+ runtime_path: ${hydra:runtime.cwd}
4
+ ckpt_dir: ${runtime_path}/runs
5
+ run_name: train_enc_dc_f32c32_EqM
6
+ cache_dir: ${ckpt_dir}/cache
7
+ run_dir: ${ckpt_dir}/jobs/${run_name}
8
+ checkpoint_path: ${run_dir}/checkpoints
9
+ dataset:
10
+ imagenet_root: imagenet_data
11
+ im_size: 128
12
+ batch_size: 192
13
+ aug_scale: 2
14
+ limit: null
15
+ distill_teacher: false
16
+ dc_ssdae:
17
+ compile: false
18
+ checkpoint: null
19
+ encoder: f32c32
20
+ encoder_checkpoint: null
21
+ encoder_train: true
22
+ decoder: S
23
+ trainer_type: FM
24
+ encoder_type: dc
25
+ sampler:
26
+ steps: 10
27
+ ema:
28
+ decay: 0.999
29
+ start_iter: 50000
30
+ aux_losses:
31
+ compile: ${dc_ssdae.compile}
32
+ repa:
33
+ i_extract: 4
34
+ n_layers: 2
35
+ lpips: true
36
+ training:
37
+ sdpa_kernel: 2
38
+ mixed_precision: bf16
39
+ grad_accumulate: 1
40
+ grad_clip: 0.1
41
+ epochs: 60
42
+ eval_freq: 1
43
+ save_on_best: FID
44
+ log_freq: 100
45
+ lr: 0.0003
46
+ weight_decay: 0.001
47
+ losses:
48
+ diffusion: 1
49
+ repa: 0.25
50
+ lpips: 0.5
51
+ kl: 1.0e-06
52
+ show_samples: 8
train_enc_dc_f32c32_EqM/.hydra/hydra.yaml ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${run_dir}
4
+ sweep:
5
+ dir: ${run_dir}
6
+ subdir: multirun_${hydra:job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ colorlog:
92
+ (): colorlog.ColoredFormatter
93
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
94
+ - %(message)s'
95
+ log_colors:
96
+ DEBUG: purple
97
+ INFO: green
98
+ WARNING: yellow
99
+ ERROR: red
100
+ CRITICAL: red
101
+ handlers:
102
+ console:
103
+ class: logging.StreamHandler
104
+ formatter: colorlog
105
+ stream: ext://sys.stdout
106
+ file:
107
+ class: logging.FileHandler
108
+ formatter: simple
109
+ filename: ${hydra:runtime.output_dir}/${hydra:job.name}.log
110
+ root:
111
+ level: INFO
112
+ handlers:
113
+ - console
114
+ - file
115
+ disable_existing_loggers: false
116
+ env: {}
117
+ mode: RUN
118
+ searchpath: []
119
+ callbacks: {}
120
+ output_subdir: .hydra
121
+ overrides:
122
+ hydra:
123
+ - hydra.mode=RUN
124
+ task:
125
+ - run_name=train_enc_dc_f32c32_EqM
126
+ - dataset.im_size=128
127
+ - dataset.aug_scale=2
128
+ - training.epochs=60
129
+ - dc_ssdae.encoder_train=true
130
+ job:
131
+ name: main
132
+ chdir: null
133
+ override_dirname: dataset.aug_scale=2,dataset.im_size=128,dc_ssdae.encoder_train=true,run_name=train_enc_dc_f32c32_EqM,training.epochs=60
134
+ id: ???
135
+ num: ???
136
+ config_name: dc_f32c32_EqM
137
+ env_set: {}
138
+ env_copy: []
139
+ config:
140
+ override_dirname:
141
+ kv_sep: '='
142
+ item_sep: ','
143
+ exclude_keys: []
144
+ runtime:
145
+ version: 1.3.2
146
+ version_base: '1.3'
147
+ cwd: /workspace/DC_SSDAE
148
+ config_sources:
149
+ - path: hydra.conf
150
+ schema: pkg
151
+ provider: hydra
152
+ - path: /workspace/DC_SSDAE/config
153
+ schema: file
154
+ provider: main
155
+ - path: hydra_plugins.hydra_colorlog.conf
156
+ schema: pkg
157
+ provider: hydra-colorlog
158
+ - path: ''
159
+ schema: structured
160
+ provider: schema
161
+ output_dir: /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM
162
+ choices:
163
+ hydra/env: default
164
+ hydra/callbacks: null
165
+ hydra/job_logging: colorlog
166
+ hydra/hydra_logging: default
167
+ hydra/hydra_help: default
168
+ hydra/help: default
169
+ hydra/sweeper: basic
170
+ hydra/launcher: basic
171
+ hydra/output: default
172
+ verbose: false
train_enc_dc_f32c32_EqM/.hydra/overrides.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ - run_name=train_enc_dc_f32c32_EqM
2
+ - dataset.im_size=128
3
+ - dataset.aug_scale=2
4
+ - training.epochs=60
5
+ - dc_ssdae.encoder_train=true
train_enc_dc_f32c32_EqM/checkpoints/best/custom_checkpoint_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726176b2bceed93bdc4062c280fc8c3909ab0fa49448ac117e8c666b885644bf
3
+ size 2613
train_enc_dc_f32c32_EqM/checkpoints/best/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f97dec3f6a847ed655e06cece14b31d9a302b44e873cee0bb4e646213ef807d
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/best/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac56b70c949f519090239c149624309f2547b4071420c958ea5ac77a6654aca
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/best/model_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bbab4c62906aa403b7b13d37c8ad279f930263ddb049d895cf49c6e0d26d760
3
+ size 598032
train_enc_dc_f32c32_EqM/checkpoints/best/model_ae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f97dec3f6a847ed655e06cece14b31d9a302b44e873cee0bb4e646213ef807d
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/best/model_ae_ema.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac56b70c949f519090239c149624309f2547b4071420c958ea5ac77a6654aca
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/best/model_aux_losses.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bbab4c62906aa403b7b13d37c8ad279f930263ddb049d895cf49c6e0d26d760
3
+ size 598032
train_enc_dc_f32c32_EqM/checkpoints/best/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d355d619e7076fe417e23f82105fdeffb5a5ed5c96793b6d9ecebe39f18adeb
3
+ size 1938294667
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da74ebe113e8d4fabdb5ff2d4f7adb86212961727e1ef1b0e51a500f3128980
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a638e16e35b2644ee8480e1e6018a28aa825be4236df698a206317d91ec5a5
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa728dc461456dfca7829ef28594484d6fc259c19d4ba5b3f653e2b29255bc13
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_3.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0bc9c84ddd3eaa7aa38a00a62879a644f28a488db5044414c322d0493ec058
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_4.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68021225260db210aa55b53d8bea3c5cddff57c2a777d98f05250fbb3e220e69
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_5.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aab7b9f9e7f8875e76f1a5f1c0c25f3329b6389239a4c806630e3cfcf22b7ed
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_6.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8d5e9206b3a5e76a7f063d58911be1b35b7c5816f79a9f4aab1620c2c192f6
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/best/random_states_7.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03fc4df777e844870cd3b8520830d35d70be3b5580abc9f2db62bdb0b1f84e26
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/custom_checkpoint_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93e385120cf4f6bf4eb26b6dfc294a83606c14f52551cf3478273c70e1a3c8c9
3
+ size 2613
train_enc_dc_f32c32_EqM/checkpoints/last/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f97dec3f6a847ed655e06cece14b31d9a302b44e873cee0bb4e646213ef807d
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/last/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac56b70c949f519090239c149624309f2547b4071420c958ea5ac77a6654aca
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/last/model_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bbab4c62906aa403b7b13d37c8ad279f930263ddb049d895cf49c6e0d26d760
3
+ size 598032
train_enc_dc_f32c32_EqM/checkpoints/last/model_ae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f97dec3f6a847ed655e06cece14b31d9a302b44e873cee0bb4e646213ef807d
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/last/model_ae_ema.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac56b70c949f519090239c149624309f2547b4071420c958ea5ac77a6654aca
3
+ size 968466492
train_enc_dc_f32c32_EqM/checkpoints/last/model_aux_losses.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bbab4c62906aa403b7b13d37c8ad279f930263ddb049d895cf49c6e0d26d760
3
+ size 598032
train_enc_dc_f32c32_EqM/checkpoints/last/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d355d619e7076fe417e23f82105fdeffb5a5ed5c96793b6d9ecebe39f18adeb
3
+ size 1938294667
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da74ebe113e8d4fabdb5ff2d4f7adb86212961727e1ef1b0e51a500f3128980
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a638e16e35b2644ee8480e1e6018a28aa825be4236df698a206317d91ec5a5
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa728dc461456dfca7829ef28594484d6fc259c19d4ba5b3f653e2b29255bc13
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_3.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0bc9c84ddd3eaa7aa38a00a62879a644f28a488db5044414c322d0493ec058
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_4.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68021225260db210aa55b53d8bea3c5cddff57c2a777d98f05250fbb3e220e69
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_5.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aab7b9f9e7f8875e76f1a5f1c0c25f3329b6389239a4c806630e3cfcf22b7ed
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_6.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8d5e9206b3a5e76a7f063d58911be1b35b7c5816f79a9f4aab1620c2c192f6
3
+ size 16449
train_enc_dc_f32c32_EqM/checkpoints/last/random_states_7.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03fc4df777e844870cd3b8520830d35d70be3b5580abc9f2db62bdb0b1f84e26
3
+ size 16449
train_enc_dc_f32c32_EqM/config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ task: train
3
+ runtime_path: ${hydra:runtime.cwd}
4
+ ckpt_dir: ${runtime_path}/runs
5
+ run_name: train_enc_dc_f32c32_EqM
6
+ cache_dir: ${ckpt_dir}/cache
7
+ run_dir: ${ckpt_dir}/jobs/${run_name}
8
+ checkpoint_path: ${run_dir}/checkpoints
9
+ dataset:
10
+ imagenet_root: imagenet_data
11
+ im_size: 128
12
+ batch_size: 192
13
+ aug_scale: 2
14
+ limit: null
15
+ distill_teacher: false
16
+ dc_ssdae:
17
+ compile: false
18
+ checkpoint: null
19
+ encoder: f32c32
20
+ encoder_checkpoint: null
21
+ encoder_train: true
22
+ decoder: S
23
+ trainer_type: FM
24
+ encoder_type: dc
25
+ sampler:
26
+ steps: 10
27
+ ema:
28
+ decay: 0.999
29
+ start_iter: 50000
30
+ aux_losses:
31
+ compile: ${dc_ssdae.compile}
32
+ repa:
33
+ i_extract: 4
34
+ n_layers: 2
35
+ lpips: true
36
+ training:
37
+ sdpa_kernel: 2
38
+ mixed_precision: bf16
39
+ grad_accumulate: 1
40
+ grad_clip: 0.1
41
+ epochs: 60
42
+ eval_freq: 1
43
+ save_on_best: FID
44
+ log_freq: 100
45
+ lr: 0.0003
46
+ weight_decay: 0.001
47
+ losses:
48
+ diffusion: 1
49
+ repa: 0.25
50
+ lpips: 0.5
51
+ kl: 1.0e-06
52
+ show_samples: 8
train_enc_dc_f32c32_EqM/main.log ADDED
@@ -0,0 +1,1153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-10-26 11:19:20,467][main][INFO] - Will write tensorboard logs inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/tensorboard_logs
2
+ [2025-10-26 11:19:20,470][main][INFO] - Runtime at /workspace/DC_SSDAE
3
+ [2025-10-26 11:19:20,472][main][INFO] - Running inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM
4
+ [2025-10-26 11:19:20,472][main][INFO] - Running args: ['main.py', 'run_name=train_enc_dc_f32c32_EqM', 'dataset.im_size=128', 'dataset.aug_scale=2', 'training.epochs=60', 'dc_ssdae.encoder_train=true']
5
+ [2025-10-26 11:19:20,473][main][INFO] - Command: 'main.py' 'run_name=train_enc_dc_f32c32_EqM' 'dataset.im_size=128' 'dataset.aug_scale=2' 'training.epochs=60' 'dc_ssdae.encoder_train=true'
6
+ [2025-10-26 11:19:20,473][main][INFO] - Accelerator with 8 processes, running on cuda:0
7
+ [2025-10-26 11:19:20,478][main][INFO] - Hydra configuration:
8
+ seed: 0
9
+ task: train
10
+ runtime_path: ${hydra:runtime.cwd}
11
+ ckpt_dir: ${runtime_path}/runs
12
+ run_name: train_enc_dc_f32c32_EqM
13
+ cache_dir: ${ckpt_dir}/cache
14
+ run_dir: ${ckpt_dir}/jobs/${run_name}
15
+ checkpoint_path: ${run_dir}/checkpoints
16
+ dataset:
17
+ imagenet_root: imagenet_data
18
+ im_size: 128
19
+ batch_size: 192
20
+ aug_scale: 2
21
+ limit: null
22
+ distill_teacher: false
23
+ dc_ssdae:
24
+ compile: false
25
+ checkpoint: null
26
+ encoder: f32c32
27
+ encoder_checkpoint: null
28
+ encoder_train: true
29
+ decoder: S
30
+ trainer_type: FM
31
+ encoder_type: dc
32
+ sampler:
33
+ steps: 10
34
+ ema:
35
+ decay: 0.999
36
+ start_iter: 50000
37
+ aux_losses:
38
+ compile: ${dc_ssdae.compile}
39
+ repa:
40
+ i_extract: 4
41
+ n_layers: 2
42
+ lpips: true
43
+ training:
44
+ sdpa_kernel: 2
45
+ mixed_precision: bf16
46
+ grad_accumulate: 1
47
+ grad_clip: 0.1
48
+ epochs: 60
49
+ eval_freq: 1
50
+ save_on_best: FID
51
+ log_freq: 100
52
+ lr: 0.0003
53
+ weight_decay: 0.001
54
+ losses:
55
+ diffusion: 1
56
+ repa: 0.25
57
+ lpips: 0.5
58
+ kl: 1.0e-06
59
+ show_samples: 8
60
+
61
+
62
+
63
+ [2025-10-26 11:19:33,933][main][INFO] - Loaded ImageNet dataset: {'train': Dataset ImageNet
64
+ Number of datapoints: 1279867
65
+ Root location: ../../../imagenet_data
66
+ Split: train
67
+ StandardTransform
68
+ Transform: Compose(
69
+ RandomResize(min_size=128, max_size=256, interpolation=InterpolationMode.LANCZOS, antialias=True)
70
+ RandomCrop(size=(128, 128), pad_if_needed=False, fill=0, padding_mode=constant)
71
+ RandomHorizontalFlip(p=0.5)
72
+ ToImage()
73
+ ToDtype(scale=True)
74
+ Normalize(mean=[0.5], std=[0.5], inplace=False)
75
+ ), 'test': Dataset ImageNet
76
+ Number of datapoints: 49950
77
+ Root location: ../../../imagenet_data
78
+ Split: validation
79
+ StandardTransform
80
+ Transform: Compose(
81
+ Resize(size=[128], interpolation=InterpolationMode.BILINEAR, antialias=True)
82
+ CenterCrop(size=(128, 128))
83
+ ToImage()
84
+ ToDtype(scale=True)
85
+ Normalize(mean=[0.5], std=[0.5], inplace=False)
86
+ )}
87
+ [2025-10-26 11:19:49,801][main][INFO] - ae parameters count:
88
+ [2025-10-26 11:19:49,807][main][INFO] - Total: #230.9M (trainable: #230.9M)
89
+ [2025-10-26 11:19:49,808][main][INFO] - - encoder: #217.4M (trainable: #217.4M)
90
+ [2025-10-26 11:19:49,809][main][INFO] - - project_in: #1.8K (trainable: #1.8K)
91
+ [2025-10-26 11:19:49,810][main][INFO] - - stages: #216.9M (trainable: #216.9M)
92
+ [2025-10-26 11:19:49,811][main][INFO] - - project_out: #576.1K (trainable: #576.1K)
93
+ [2025-10-26 11:19:49,813][main][INFO] - - decoder: #13.5M (trainable: #13.5M)
94
+ [2025-10-26 11:19:49,813][main][INFO] - - conv_in_img: #896 (trainable: #896)
95
+ [2025-10-26 11:19:49,814][main][INFO] - - conv_in_z: #9.0K (trainable: #9.0K)
96
+ [2025-10-26 11:19:49,814][main][INFO] - - conv_in: #36.1K (trainable: #36.1K)
97
+ [2025-10-26 11:19:49,815][main][INFO] - - batch_norm_z: #64 (trainable: #64)
98
+ [2025-10-26 11:19:49,815][main][INFO] - - time_proj: #0 (trainable: #0)
99
+ [2025-10-26 11:19:49,817][main][INFO] - - time_embedding: #80.5K (trainable: #80.5K)
100
+ [2025-10-26 11:19:49,818][main][INFO] - - ada_ctx_proj: #54.1K (trainable: #54.1K)
101
+ [2025-10-26 11:19:49,819][main][INFO] - - down_blocks: #3.0M (trainable: #3.0M)
102
+ [2025-10-26 11:19:49,820][main][INFO] - - mid_block: #3.4M (trainable: #3.4M)
103
+ [2025-10-26 11:19:49,820][main][INFO] - - up_blocks: #6.9M (trainable: #6.9M)
104
+ [2025-10-26 11:19:49,821][main][INFO] - - conv_norm_out: #128 (trainable: #128)
105
+ [2025-10-26 11:19:49,821][main][INFO] - - conv_out_act: #0 (trainable: #0)
106
+ [2025-10-26 11:19:49,822][main][INFO] - - conv_out: #1.7K (trainable: #1.7K)
107
+ [2025-10-26 11:19:49,825][main][INFO] - ae: EMAWrapper(
108
+ (model): DistributedDataParallel(
109
+ (module): DC_SSDAE(
110
+ (encoder): DCEncoder(
111
+ (project_in): ConvPixelUnshuffleDownSampleLayer(
112
+ (conv): ConvLayer(
113
+ (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
114
+ )
115
+ )
116
+ (stages): ModuleList(
117
+ (0): OpSequential(
118
+ (op_list): ModuleList()
119
+ )
120
+ (1): OpSequential(
121
+ (op_list): ModuleList(
122
+ (0-4): 5 x ResidualBlock(
123
+ (main): ResBlock(
124
+ (conv1): ConvLayer(
125
+ (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
126
+ (act): SiLU()
127
+ )
128
+ (conv2): ConvLayer(
129
+ (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
130
+ )
131
+ )
132
+ (shortcut): IdentityLayer()
133
+ )
134
+ (5): ResidualBlock(
135
+ (main): ConvPixelUnshuffleDownSampleLayer(
136
+ (conv): ConvLayer(
137
+ (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
138
+ )
139
+ )
140
+ (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer()
141
+ )
142
+ )
143
+ )
144
+ (2): OpSequential(
145
+ (op_list): ModuleList(
146
+ (0-9): 10 x ResidualBlock(
147
+ (main): ResBlock(
148
+ (conv1): ConvLayer(
149
+ (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
150
+ (act): SiLU()
151
+ )
152
+ (conv2): ConvLayer(
153
+ (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
154
+ )
155
+ )
156
+ (shortcut): IdentityLayer()
157
+ )
158
+ (10): ResidualBlock(
159
+ (main): ConvPixelUnshuffleDownSampleLayer(
160
+ (conv): ConvLayer(
161
+ (conv): Conv2d(512, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
162
+ )
163
+ )
164
+ (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer()
165
+ )
166
+ )
167
+ )
168
+ (3): OpSequential(
169
+ (op_list): ModuleList(
170
+ (0-3): 4 x ResidualBlock(
171
+ (main): ResBlock(
172
+ (conv1): ConvLayer(
173
+ (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
174
+ (act): SiLU()
175
+ )
176
+ (conv2): ConvLayer(
177
+ (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
178
+ )
179
+ )
180
+ (shortcut): IdentityLayer()
181
+ )
182
+ (4): ResidualBlock(
183
+ (main): ConvPixelUnshuffleDownSampleLayer(
184
+ (conv): ConvLayer(
185
+ (conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
186
+ )
187
+ )
188
+ (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer()
189
+ )
190
+ )
191
+ )
192
+ (4): OpSequential(
193
+ (op_list): ModuleList(
194
+ (0-3): 4 x ResidualBlock(
195
+ (main): ResBlock(
196
+ (conv1): ConvLayer(
197
+ (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
198
+ (act): SiLU()
199
+ )
200
+ (conv2): ConvLayer(
201
+ (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
202
+ )
203
+ )
204
+ (shortcut): IdentityLayer()
205
+ )
206
+ (4): ResidualBlock(
207
+ (main): ConvPixelUnshuffleDownSampleLayer(
208
+ (conv): ConvLayer(
209
+ (conv): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
210
+ )
211
+ )
212
+ (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer()
213
+ )
214
+ )
215
+ )
216
+ (5): OpSequential(
217
+ (op_list): ModuleList(
218
+ (0-3): 4 x ResidualBlock(
219
+ (main): ResBlock(
220
+ (conv1): ConvLayer(
221
+ (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
222
+ (act): SiLU()
223
+ )
224
+ (conv2): ConvLayer(
225
+ (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
226
+ )
227
+ )
228
+ (shortcut): IdentityLayer()
229
+ )
230
+ )
231
+ )
232
+ )
233
+ (project_out): OpSequential(
234
+ (op_list): ModuleList(
235
+ (0): ConvLayer(
236
+ (conv): Conv2d(1024, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
237
+ )
238
+ )
239
+ )
240
+ )
241
+ (decoder): UViTDecoder(
242
+ (conv_in_img): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
243
+ (conv_in_z): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
244
+ (conv_in): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
245
+ (batch_norm_z): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
246
+ (time_proj): Timesteps()
247
+ (time_embedding): TimestepEmbedding(
248
+ (linear_1): Linear(in_features=64, out_features=256, bias=True)
249
+ (act): SiLU()
250
+ (linear_2): Linear(in_features=256, out_features=256, bias=True)
251
+ )
252
+ (ada_ctx_proj): Sequential(
253
+ (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
254
+ (1): SiLU()
255
+ (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
256
+ )
257
+ (down_blocks): ModuleList(
258
+ (0): DownBlock2D(
259
+ (resnets): ModuleList(
260
+ (0-1): 2 x ResnetBlock2D(
261
+ (norm1): AdaGroupNorm2D(
262
+ (ctx_proj): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
263
+ )
264
+ (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
265
+ (time_emb_proj): Linear(in_features=256, out_features=128, bias=True)
266
+ (norm2): GroupNorm(32, 64, eps=1e-05, affine=True)
267
+ (dropout): Dropout(p=0.0, inplace=False)
268
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
269
+ (nonlinearity): SiLU()
270
+ )
271
+ )
272
+ (downsamplers): ModuleList(
273
+ (0): Downsample2D(
274
+ (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
275
+ )
276
+ )
277
+ )
278
+ (1): DownBlock2D(
279
+ (resnets): ModuleList(
280
+ (0): ResnetBlock2D(
281
+ (norm1): AdaGroupNorm2D(
282
+ (ctx_proj): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
283
+ )
284
+ (conv1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
285
+ (time_emb_proj): Linear(in_features=256, out_features=192, bias=True)
286
+ (norm2): GroupNorm(32, 96, eps=1e-05, affine=True)
287
+ (dropout): Dropout(p=0.0, inplace=False)
288
+ (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
289
+ (nonlinearity): SiLU()
290
+ (conv_shortcut): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1))
291
+ )
292
+ (1): ResnetBlock2D(
293
+ (norm1): AdaGroupNorm2D(
294
+ (ctx_proj): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1))
295
+ )
296
+ (conv1): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
297
+ (time_emb_proj): Linear(in_features=256, out_features=192, bias=True)
298
+ (norm2): GroupNorm(32, 96, eps=1e-05, affine=True)
299
+ (dropout): Dropout(p=0.0, inplace=False)
300
+ (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
301
+ (nonlinearity): SiLU()
302
+ )
303
+ )
304
+ (downsamplers): ModuleList(
305
+ (0): Downsample2D(
306
+ (conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
307
+ )
308
+ )
309
+ )
310
+ (2): DownBlock2D(
311
+ (resnets): ModuleList(
312
+ (0): ResnetBlock2D(
313
+ (norm1): AdaGroupNorm2D(
314
+ (ctx_proj): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1))
315
+ )
316
+ (conv1): Conv2d(96, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
317
+ (time_emb_proj): Linear(in_features=256, out_features=320, bias=True)
318
+ (norm2): GroupNorm(32, 160, eps=1e-05, affine=True)
319
+ (dropout): Dropout(p=0.0, inplace=False)
320
+ (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
321
+ (nonlinearity): SiLU()
322
+ (conv_shortcut): Conv2d(96, 160, kernel_size=(1, 1), stride=(1, 1))
323
+ )
324
+ (1): ResnetBlock2D(
325
+ (norm1): AdaGroupNorm2D(
326
+ (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1))
327
+ )
328
+ (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
329
+ (time_emb_proj): Linear(in_features=256, out_features=320, bias=True)
330
+ (norm2): GroupNorm(32, 160, eps=1e-05, affine=True)
331
+ (dropout): Dropout(p=0.0, inplace=False)
332
+ (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
333
+ (nonlinearity): SiLU()
334
+ )
335
+ )
336
+ (downsamplers): ModuleList(
337
+ (0): Downsample2D(
338
+ (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
339
+ )
340
+ )
341
+ )
342
+ (3): DownBlock2D(
343
+ (resnets): ModuleList(
344
+ (0-1): 2 x ResnetBlock2D(
345
+ (norm1): AdaGroupNorm2D(
346
+ (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1))
347
+ )
348
+ (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
349
+ (time_emb_proj): Linear(in_features=256, out_features=320, bias=True)
350
+ (norm2): GroupNorm(32, 160, eps=1e-05, affine=True)
351
+ (dropout): Dropout(p=0.0, inplace=False)
352
+ (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
353
+ (nonlinearity): SiLU()
354
+ )
355
+ )
356
+ )
357
+ )
358
+ (mid_block): UViTMiddleTransformer(
359
+ (proj_in): Linear(in_features=160, out_features=160, bias=True)
360
+ (transformer_blocks): ModuleList(
361
+ (0-7): 8 x TransformerBlock(
362
+ (norm1): AdaLayerNorm(
363
+ (silu): SiLU()
364
+ (linear): Linear(in_features=64, out_features=320, bias=True)
365
+ (norm): LayerNorm((160,), eps=1e-05, elementwise_affine=False)
366
+ )
367
+ (attn1): Attention(
368
+ (to_q): Linear(in_features=160, out_features=160, bias=False)
369
+ (to_k): Linear(in_features=160, out_features=160, bias=False)
370
+ (to_v): Linear(in_features=160, out_features=160, bias=False)
371
+ (out_proj): Linear(in_features=160, out_features=160, bias=True)
372
+ (out_drop): Dropout(p=0.0, inplace=False)
373
+ )
374
+ (norm2): LayerNorm((160,), eps=1e-05, elementwise_affine=True)
375
+ (ff): FeedForward(
376
+ (proj_in_act): GEGLU(
377
+ (proj): Linear(in_features=160, out_features=1280, bias=True)
378
+ )
379
+ (drop): Dropout(p=0.0, inplace=False)
380
+ (proj_out): Linear(in_features=640, out_features=160, bias=True)
381
+ )
382
+ (relative_position_bias): RelativePositionBias()
383
+ )
384
+ )
385
+ (proj_out): Linear(in_features=160, out_features=160, bias=True)
386
+ (norm): GroupNorm(32, 160, eps=1e-06, affine=True)
387
+ )
388
+ (up_blocks): ModuleList(
389
+ (0): UpBlock2D(
390
+ (resnets): ModuleList(
391
+ (0-2): 3 x ResnetBlock2D(
392
+ (norm1): AdaGroupNorm2D(
393
+ (ctx_proj): Conv2d(64, 640, kernel_size=(1, 1), stride=(1, 1))
394
+ )
395
+ (conv1): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
396
+ (time_emb_proj): Linear(in_features=256, out_features=320, bias=True)
397
+ (norm2): GroupNorm(32, 160, eps=1e-05, affine=True)
398
+ (dropout): Dropout(p=0.0, inplace=False)
399
+ (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
400
+ (nonlinearity): SiLU()
401
+ (conv_shortcut): Conv2d(320, 160, kernel_size=(1, 1), stride=(1, 1))
402
+ )
403
+ )
404
+ (upsamplers): ModuleList(
405
+ (0): Upsample2D(
406
+ (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
407
+ )
408
+ )
409
+ )
410
+ (1): UpBlock2D(
411
+ (resnets): ModuleList(
412
+ (0-1): 2 x ResnetBlock2D(
413
+ (norm1): AdaGroupNorm2D(
414
+ (ctx_proj): Conv2d(64, 640, kernel_size=(1, 1), stride=(1, 1))
415
+ )
416
+ (conv1): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
417
+ (time_emb_proj): Linear(in_features=256, out_features=320, bias=True)
418
+ (norm2): GroupNorm(32, 160, eps=1e-05, affine=True)
419
+ (dropout): Dropout(p=0.0, inplace=False)
420
+ (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
421
+ (nonlinearity): SiLU()
422
+ (conv_shortcut): Conv2d(320, 160, kernel_size=(1, 1), stride=(1, 1))
423
+ )
424
+ (2): ResnetBlock2D(
425
+ (norm1): AdaGroupNorm2D(
426
+ (ctx_proj): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1))
427
+ )
428
+ (conv1): Conv2d(256, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
429
+ (time_emb_proj): Linear(in_features=256, out_features=320, bias=True)
430
+ (norm2): GroupNorm(32, 160, eps=1e-05, affine=True)
431
+ (dropout): Dropout(p=0.0, inplace=False)
432
+ (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
433
+ (nonlinearity): SiLU()
434
+ (conv_shortcut): Conv2d(256, 160, kernel_size=(1, 1), stride=(1, 1))
435
+ )
436
+ )
437
+ (upsamplers): ModuleList(
438
+ (0): Upsample2D(
439
+ (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
440
+ )
441
+ )
442
+ )
443
+ (2): UpBlock2D(
444
+ (resnets): ModuleList(
445
+ (0): ResnetBlock2D(
446
+ (norm1): AdaGroupNorm2D(
447
+ (ctx_proj): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1))
448
+ )
449
+ (conv1): Conv2d(256, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
450
+ (time_emb_proj): Linear(in_features=256, out_features=192, bias=True)
451
+ (norm2): GroupNorm(32, 96, eps=1e-05, affine=True)
452
+ (dropout): Dropout(p=0.0, inplace=False)
453
+ (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
454
+ (nonlinearity): SiLU()
455
+ (conv_shortcut): Conv2d(256, 96, kernel_size=(1, 1), stride=(1, 1))
456
+ )
457
+ (1): ResnetBlock2D(
458
+ (norm1): AdaGroupNorm2D(
459
+ (ctx_proj): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1))
460
+ )
461
+ (conv1): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
462
+ (time_emb_proj): Linear(in_features=256, out_features=192, bias=True)
463
+ (norm2): GroupNorm(32, 96, eps=1e-05, affine=True)
464
+ (dropout): Dropout(p=0.0, inplace=False)
465
+ (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
466
+ (nonlinearity): SiLU()
467
+ (conv_shortcut): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
468
+ )
469
+ (2): ResnetBlock2D(
470
+ (norm1): AdaGroupNorm2D(
471
+ (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1))
472
+ )
473
+ (conv1): Conv2d(160, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
474
+ (time_emb_proj): Linear(in_features=256, out_features=192, bias=True)
475
+ (norm2): GroupNorm(32, 96, eps=1e-05, affine=True)
476
+ (dropout): Dropout(p=0.0, inplace=False)
477
+ (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
478
+ (nonlinearity): SiLU()
479
+ (conv_shortcut): Conv2d(160, 96, kernel_size=(1, 1), stride=(1, 1))
480
+ )
481
+ )
482
+ (upsamplers): ModuleList(
483
+ (0): Upsample2D(
484
+ (conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
485
+ )
486
+ )
487
+ )
488
+ (3): UpBlock2D(
489
+ (resnets): ModuleList(
490
+ (0): ResnetBlock2D(
491
+ (norm1): AdaGroupNorm2D(
492
+ (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1))
493
+ )
494
+ (conv1): Conv2d(160, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
495
+ (time_emb_proj): Linear(in_features=256, out_features=128, bias=True)
496
+ (norm2): GroupNorm(32, 64, eps=1e-05, affine=True)
497
+ (dropout): Dropout(p=0.0, inplace=False)
498
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
499
+ (nonlinearity): SiLU()
500
+ (conv_shortcut): Conv2d(160, 64, kernel_size=(1, 1), stride=(1, 1))
501
+ )
502
+ (1-2): 2 x ResnetBlock2D(
503
+ (norm1): AdaGroupNorm2D(
504
+ (ctx_proj): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
505
+ )
506
+ (conv1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
507
+ (time_emb_proj): Linear(in_features=256, out_features=128, bias=True)
508
+ (norm2): GroupNorm(32, 64, eps=1e-05, affine=True)
509
+ (dropout): Dropout(p=0.0, inplace=False)
510
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
511
+ (nonlinearity): SiLU()
512
+ (conv_shortcut): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
513
+ )
514
+ )
515
+ )
516
+ )
517
+ (conv_norm_out): GroupNorm(32, 64, eps=1e-05, affine=True)
518
+ (conv_out_act): SiLU()
519
+ (conv_out): Conv2d(64, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
520
+ )
521
+ )
522
+ )
523
+ (ema): EMA(ema_model=DC_SSDAE, decay=0.999, start_iter=50000)
524
+ )
525
+ [2025-10-26 11:19:49,825][main][INFO] - aux_losses parameters count:
526
+ [2025-10-26 11:19:49,826][main][INFO] - Total: #96.7M (trainable: #145.9K)
527
+ [2025-10-26 11:19:49,827][main][INFO] - - repa_loss: #82.7M (trainable: #145.9K)
528
+ [2025-10-26 11:19:49,828][main][INFO] - - lpips_loss: #14.0M (trainable: #0)
529
+ [2025-10-26 11:19:49,828][main][INFO] - aux_losses: DistributedDataParallel(
530
+ (module): SSDDLosses(
531
+ (repa_loss): REPALoss(
532
+ (features_extractor): Frozen(DinoEncoder/Dinov2Model)
533
+ (repa_mlp): Sequential(
534
+ (0): Linear(in_features=160, out_features=160, bias=True)
535
+ (1): SiLU()
536
+ (2): Linear(in_features=160, out_features=768, bias=True)
537
+ )
538
+ (repa_loss): CosineSimilarity()
539
+ )
540
+ (lpips_loss): Frozen(LPIPS)
541
+ )
542
+ )
543
+ [2025-10-26 11:19:49,833][main][INFO] - Optimizer for autoencoder: RAdamScheduleFree (
544
+ Parameter Group 0
545
+ betas: (0.9, 0.999)
546
+ eps: 1e-08
547
+ foreach: True
548
+ k: 0
549
+ lr: 0.0003
550
+ lr_max: -1.0
551
+ r: 0.0
552
+ scheduled_lr: 0.0
553
+ silent_sgd_phase: True
554
+ train_mode: False
555
+ weight_decay: 0.001
556
+ weight_lr_power: 2.0
557
+ weight_sum: 0.0
558
+
559
+ Parameter Group 1
560
+ betas: (0.9, 0.999)
561
+ eps: 1e-08
562
+ foreach: True
563
+ k: 0
564
+ lr: 0.0003
565
+ lr_max: -1.0
566
+ r: 0.0
567
+ scheduled_lr: 0.0
568
+ silent_sgd_phase: True
569
+ train_mode: False
570
+ weight_decay: 0.0
571
+ weight_lr_power: 2.0
572
+ weight_sum: 0.0
573
+ )
574
+ [2025-10-26 11:19:49,843][main][INFO] - No training state found to resume from None
575
+ [2025-10-26 11:19:49,844][main][INFO] - ====================== RUNNING TASK train
576
+ [2025-10-26 11:19:49,844][main][INFO] - Starting training
577
+ [2025-10-26 11:19:49,845][main][INFO] - Batch size of 192 (24 per GPU, 1 acumulation step(s) 8 process(es))
578
+ [2025-10-26 11:19:49,853][main][INFO] - ---
579
+
580
+
581
+ [2025-10-26 11:19:49,854][main][INFO] - [T_total=00:00:29 | T_train=00:00:00] Start epoch 0
582
+ [2025-10-26 14:25:01,522][main][INFO] - [T_total=03:05:41 | T_train=03:05:11 | T_epoch=03:05:11] End of epoch 0 (6666 steps) train loss 67151
583
+ [2025-10-26 14:25:01,524][main][INFO] - [Epoch 0] All losses: [[diffusion=0.124278 ; kl=6.71505e+10 ; lpips=0.360362 ; repa=0.667823]]
584
+ [2025-10-26 14:28:30,738][main][INFO] - [Epoch 1] Test metrics: [[MSE=47.45 | MAE=0.161 | LPIPS=0.4364 | PSNR=13.24 | SSIM=0.2403 | dreamsim=0.6167 | FID=113.3]]
585
+ [2025-10-26 14:28:30,740][main][INFO] - [Epoch 1] Best metrics: [[min_MSE=47.45 | min_MAE=0.161 | min_LPIPS=0.4364 | max_PSNR=13.24 | max_SSIM=0.2403 | min_dreamsim=0.6167 | min_FID=113.3]]
586
+ [2025-10-26 14:28:30,741][main][DEBUG] - Writing images to disk...
587
+ [2025-10-26 14:28:31,622][main][DEBUG] - Image(s) saved on disk
588
+ [2025-10-26 14:28:31,831][main][INFO] - End of epoch timers: [T_train=03:05:11 | T_epoch=03:05:11 | T_eval=00:03:30 | T_total=03:09:11]
589
+ [2025-10-26 14:28:31,832][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
590
+ [2025-10-26 14:28:43,727][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
591
+ [2025-10-26 14:28:54,887][main][INFO] - ---
592
+
593
+
594
+ [2025-10-26 14:28:54,888][main][INFO] - [T_total=03:09:34 | T_train=03:05:11] Start epoch 1
595
+ [2025-10-26 17:33:46,084][main][INFO] - [T_total=06:14:25 | T_train=06:10:02 | T_epoch=03:04:51] End of epoch 1 (13332 steps) train loss 4110.26
596
+ [2025-10-26 17:33:46,086][main][INFO] - [Epoch 1] All losses: [[diffusion=0.0919295 ; kl=4.10988e+09 ; lpips=0.275692 ; repa=0.588433]]
597
+ [2025-10-26 17:37:12,979][main][INFO] - [Epoch 2] Test metrics: [[MSE=46.7 | MAE=0.1611 | LPIPS=0.3256 | PSNR=13.31 | SSIM=0.2891 | dreamsim=0.496 | FID=78.54]]
598
+ [2025-10-26 17:37:12,981][main][INFO] - [Epoch 2] Best metrics: [[min_MSE=46.7 | min_MAE=0.161 | min_LPIPS=0.3256 | max_PSNR=13.31 | max_SSIM=0.2891 | min_dreamsim=0.496 | min_FID=78.54]]
599
+ [2025-10-26 17:37:12,982][main][DEBUG] - Writing images to disk...
600
+ [2025-10-26 17:37:13,796][main][DEBUG] - Image(s) saved on disk
601
+ [2025-10-26 17:37:14,011][main][INFO] - End of epoch timers: [T_train=06:10:02 | T_epoch=03:04:51 | T_eval=00:06:58 | T_total=06:17:53]
602
+ [2025-10-26 17:37:14,012][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
603
+ [2025-10-26 17:37:25,273][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
604
+ [2025-10-26 17:37:35,581][main][INFO] - ---
605
+
606
+
607
+ [2025-10-26 17:37:35,582][main][INFO] - [T_total=06:18:15 | T_train=06:10:02] Start epoch 2
608
+ [2025-10-26 20:42:34,608][main][INFO] - [T_total=09:23:14 | T_train=09:15:01 | T_epoch=03:04:59] End of epoch 2 (19998 steps) train loss 1112.41
609
+ [2025-10-26 20:42:34,609][main][INFO] - [Epoch 2] All losses: [[diffusion=0.0875515 ; kl=1.11206e+09 ; lpips=0.238805 ; repa=0.559219]]
610
+ [2025-10-26 20:46:02,005][main][INFO] - [Epoch 3] Test metrics: [[MSE=39.34 | MAE=0.1462 | LPIPS=0.2609 | PSNR=14.05 | SSIM=0.3195 | dreamsim=0.4047 | FID=56.04]]
611
+ [2025-10-26 20:46:02,007][main][INFO] - [Epoch 3] Best metrics: [[min_MSE=39.34 | min_MAE=0.1462 | min_LPIPS=0.2609 | max_PSNR=14.05 | max_SSIM=0.3195 | min_dreamsim=0.4047 | min_FID=56.04]]
612
+ [2025-10-26 20:46:02,007][main][DEBUG] - Writing images to disk...
613
+ [2025-10-26 20:46:02,818][main][DEBUG] - Image(s) saved on disk
614
+ [2025-10-26 20:46:03,028][main][INFO] - End of epoch timers: [T_train=09:15:01 | T_epoch=03:04:59 | T_eval=00:10:26 | T_total=09:26:42]
615
+ [2025-10-26 20:46:03,029][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
616
+ [2025-10-26 20:46:14,286][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
617
+ [2025-10-26 20:46:24,572][main][INFO] - ---
618
+
619
+
620
+ [2025-10-26 20:46:24,573][main][INFO] - [T_total=09:27:04 | T_train=09:15:01] Start epoch 3
621
+ [2025-10-26 23:51:05,689][main][INFO] - [T_total=12:31:45 | T_train=12:19:43 | T_epoch=03:04:41] End of epoch 3 (26664 steps) train loss 5.02755
622
+ [2025-10-26 23:51:05,690][main][INFO] - [Epoch 3] All losses: [[diffusion=0.0849642 ; kl=4.69653e+06 ; lpips=0.22171 ; repa=0.540818]]
623
+ [2025-10-26 23:54:33,185][main][INFO] - [Epoch 4] Test metrics: [[MSE=35.97 | MAE=0.1387 | LPIPS=0.2313 | PSNR=14.44 | SSIM=0.3346 | dreamsim=0.3568 | FID=45.03]]
624
+ [2025-10-26 23:54:33,187][main][INFO] - [Epoch 4] Best metrics: [[min_MSE=35.97 | min_MAE=0.1387 | min_LPIPS=0.2313 | max_PSNR=14.44 | max_SSIM=0.3346 | min_dreamsim=0.3568 | min_FID=45.03]]
625
+ [2025-10-26 23:54:33,188][main][DEBUG] - Writing images to disk...
626
+ [2025-10-26 23:54:34,013][main][DEBUG] - Image(s) saved on disk
627
+ [2025-10-26 23:54:34,260][main][INFO] - End of epoch timers: [T_train=12:19:43 | T_epoch=03:04:41 | T_eval=00:13:54 | T_total=12:35:13]
628
+ [2025-10-26 23:54:34,261][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
629
+ [2025-10-26 23:54:45,885][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
630
+ [2025-10-26 23:54:56,831][main][INFO] - ---
631
+
632
+
633
+ [2025-10-26 23:54:56,832][main][INFO] - [T_total=12:35:36 | T_train=12:19:43] Start epoch 4
634
+ [2025-10-27 03:00:25,624][main][INFO] - [T_total=15:41:05 | T_train=15:25:11 | T_epoch=03:05:28] End of epoch 4 (33330 steps) train loss 166.439
635
+ [2025-10-27 03:00:25,626][main][INFO] - [Epoch 4] All losses: [[diffusion=0.0838747 ; kl=1.66118e+08 ; lpips=0.211539 ; repa=0.528085]]
636
+ [2025-10-27 03:03:52,781][main][INFO] - [Epoch 5] Test metrics: [[MSE=31.78 | MAE=0.129 | LPIPS=0.2131 | PSNR=14.98 | SSIM=0.3511 | dreamsim=0.3263 | FID=38.77]]
637
+ [2025-10-27 03:03:52,782][main][INFO] - [Epoch 5] Best metrics: [[min_MSE=31.78 | min_MAE=0.129 | min_LPIPS=0.2131 | max_PSNR=14.98 | max_SSIM=0.3511 | min_dreamsim=0.3263 | min_FID=38.77]]
638
+ [2025-10-27 03:03:52,783][main][DEBUG] - Writing images to disk...
639
+ [2025-10-27 03:03:53,617][main][DEBUG] - Image(s) saved on disk
640
+ [2025-10-27 03:03:53,821][main][INFO] - End of epoch timers: [T_train=15:25:11 | T_epoch=03:05:28 | T_eval=00:17:22 | T_total=15:44:33]
641
+ [2025-10-27 03:03:53,821][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
642
+ [2025-10-27 03:04:05,149][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
643
+ [2025-10-27 03:04:14,375][main][INFO] - ---
644
+
645
+
646
+ [2025-10-27 03:04:14,376][main][INFO] - [T_total=15:44:53 | T_train=15:25:11] Start epoch 5
647
+ [2025-10-27 06:10:26,456][main][INFO] - [T_total=18:51:05 | T_train=18:31:23 | T_epoch=03:06:12] End of epoch 5 (39996 steps) train loss 39272
648
+ [2025-10-27 06:10:26,457][main][INFO] - [Epoch 5] All losses: [[diffusion=0.0828694 ; kl=3.92717e+10 ; lpips=0.205352 ; repa=0.518688]]
649
+ [2025-10-27 06:13:53,521][main][INFO] - [Epoch 6] Test metrics: [[MSE=29.02 | MAE=0.1223 | LPIPS=0.2011 | PSNR=15.37 | SSIM=0.3643 | dreamsim=0.3048 | FID=34.65]]
650
+ [2025-10-27 06:13:53,524][main][INFO] - [Epoch 6] Best metrics: [[min_MSE=29.02 | min_MAE=0.1223 | min_LPIPS=0.2011 | max_PSNR=15.37 | max_SSIM=0.3643 | min_dreamsim=0.3048 | min_FID=34.65]]
651
+ [2025-10-27 06:13:53,525][main][DEBUG] - Writing images to disk...
652
+ [2025-10-27 06:13:54,357][main][DEBUG] - Image(s) saved on disk
653
+ [2025-10-27 06:13:54,564][main][INFO] - End of epoch timers: [T_train=18:31:23 | T_epoch=03:06:12 | T_eval=00:20:50 | T_total=18:54:34]
654
+ [2025-10-27 06:13:54,565][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
655
+ [2025-10-27 06:14:09,698][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
656
+ [2025-10-27 06:14:20,685][main][INFO] - ---
657
+
658
+
659
+ [2025-10-27 06:14:20,686][main][INFO] - [T_total=18:55:00 | T_train=18:31:23] Start epoch 6
660
+ [2025-10-27 09:20:11,071][main][INFO] - [T_total=22:00:50 | T_train=21:37:14 | T_epoch=03:05:50] End of epoch 6 (46662 steps) train loss 38.9426
661
+ [2025-10-27 09:20:11,072][main][INFO] - [Epoch 6] All losses: [[diffusion=0.0819753 ; kl=3.86332e+07 ; lpips=0.199326 ; repa=0.510736]]
662
+ [2025-10-27 09:23:38,395][main][INFO] - [Epoch 7] Test metrics: [[MSE=27.01 | MAE=0.1173 | LPIPS=0.191 | PSNR=15.68 | SSIM=0.3792 | dreamsim=0.2861 | FID=30.48]]
663
+ [2025-10-27 09:23:38,397][main][INFO] - [Epoch 7] Best metrics: [[min_MSE=27.01 | min_MAE=0.1173 | min_LPIPS=0.191 | max_PSNR=15.68 | max_SSIM=0.3792 | min_dreamsim=0.2861 | min_FID=30.48]]
664
+ [2025-10-27 09:23:38,398][main][DEBUG] - Writing images to disk...
665
+ [2025-10-27 09:23:39,236][main][DEBUG] - Image(s) saved on disk
666
+ [2025-10-27 09:23:39,501][main][INFO] - End of epoch timers: [T_train=21:37:14 | T_epoch=03:05:50 | T_eval=00:24:19 | T_total=22:04:19]
667
+ [2025-10-27 09:23:39,505][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
668
+ [2025-10-27 09:23:49,578][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
669
+ [2025-10-27 09:23:58,982][main][INFO] - ---
670
+
671
+
672
+ [2025-10-27 09:23:58,983][main][INFO] - [T_total=22:04:38 | T_train=21:37:14] Start epoch 7
673
+ [2025-10-27 12:29:38,691][main][INFO] - [T_total=25:10:18 | T_train=24:42:53 | T_epoch=03:05:39] End of epoch 7 (53328 steps) train loss 21.8781
674
+ [2025-10-27 12:29:38,692][main][INFO] - [Epoch 7] All losses: [[diffusion=0.0808239 ; kl=2.15753e+07 ; lpips=0.192459 ; repa=0.50294]]
675
+ [2025-10-27 12:33:06,379][main][INFO] - [Epoch 8] Test metrics: [[MSE=25.89 | MAE=0.1143 | LPIPS=0.1838 | PSNR=15.87 | SSIM=0.3878 | dreamsim=0.273 | FID=27.51]]
676
+ [2025-10-27 12:33:06,381][main][INFO] - [Epoch 8] Best metrics: [[min_MSE=25.89 | min_MAE=0.1143 | min_LPIPS=0.1838 | max_PSNR=15.87 | max_SSIM=0.3878 | min_dreamsim=0.273 | min_FID=27.51]]
677
+ [2025-10-27 12:33:06,382][main][DEBUG] - Writing images to disk...
678
+ [2025-10-27 12:33:07,208][main][DEBUG] - Image(s) saved on disk
679
+ [2025-10-27 12:33:07,412][main][INFO] - End of epoch timers: [T_train=24:42:53 | T_epoch=03:05:39 | T_eval=00:27:47 | T_total=25:13:46]
680
+ [2025-10-27 12:33:07,414][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
681
+ [2025-10-27 12:33:18,072][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
682
+ [2025-10-27 12:33:28,529][main][INFO] - ---
683
+
684
+
685
+ [2025-10-27 12:33:28,530][main][INFO] - [T_total=25:14:08 | T_train=24:42:53] Start epoch 8
686
+ [2025-10-27 15:39:17,310][main][INFO] - [T_total=28:19:56 | T_train=27:48:42 | T_epoch=03:05:48] End of epoch 8 (59994 steps) train loss 56.025
687
+ [2025-10-27 15:39:17,312][main][INFO] - [Epoch 8] All losses: [[diffusion=0.0821982 ; kl=5.5718e+07 ; lpips=0.19805 ; repa=0.503114]]
688
+ [2025-10-27 15:42:44,898][main][INFO] - [Epoch 9] Test metrics: [[MSE=25.28 | MAE=0.1125 | LPIPS=0.1792 | PSNR=15.97 | SSIM=0.3941 | dreamsim=0.2633 | FID=25.14]]
689
+ [2025-10-27 15:42:44,902][main][INFO] - [Epoch 9] Best metrics: [[min_MSE=25.28 | min_MAE=0.1125 | min_LPIPS=0.1792 | max_PSNR=15.97 | max_SSIM=0.3941 | min_dreamsim=0.2633 | min_FID=25.14]]
690
+ [2025-10-27 15:42:44,903][main][DEBUG] - Writing images to disk...
691
+ [2025-10-27 15:42:45,999][main][DEBUG] - Image(s) saved on disk
692
+ [2025-10-27 15:42:46,279][main][INFO] - End of epoch timers: [T_train=27:48:42 | T_epoch=03:05:48 | T_eval=00:31:16 | T_total=28:23:25]
693
+ [2025-10-27 15:42:46,281][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
694
+ [2025-10-27 15:42:57,387][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
695
+ [2025-10-27 15:43:08,322][main][INFO] - ---
696
+
697
+
698
+ [2025-10-27 15:43:08,323][main][INFO] - [T_total=28:23:47 | T_train=27:48:42] Start epoch 9
699
+ [2025-10-27 18:48:50,025][main][INFO] - [T_total=31:29:29 | T_train=30:54:24 | T_epoch=03:05:41] End of epoch 9 (66660 steps) train loss 66.697
700
+ [2025-10-27 18:48:50,026][main][INFO] - [Epoch 9] All losses: [[diffusion=0.0810491 ; kl=6.63959e+07 ; lpips=0.191886 ; repa=0.496548]]
701
+ [2025-10-27 18:52:17,550][main][INFO] - [Epoch 10] Test metrics: [[MSE=24.51 | MAE=0.1103 | LPIPS=0.1742 | PSNR=16.11 | SSIM=0.4006 | dreamsim=0.2544 | FID=23.1]]
702
+ [2025-10-27 18:52:17,551][main][INFO] - [Epoch 10] Best metrics: [[min_MSE=24.51 | min_MAE=0.1103 | min_LPIPS=0.1742 | max_PSNR=16.11 | max_SSIM=0.4006 | min_dreamsim=0.2544 | min_FID=23.1]]
703
+ [2025-10-27 18:52:17,552][main][DEBUG] - Writing images to disk...
704
+ [2025-10-27 18:52:18,382][main][DEBUG] - Image(s) saved on disk
705
+ [2025-10-27 18:52:18,587][main][INFO] - End of epoch timers: [T_train=30:54:24 | T_epoch=03:05:41 | T_eval=00:34:44 | T_total=31:32:58]
706
+ [2025-10-27 18:52:18,589][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
707
+ [2025-10-27 18:52:30,619][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
708
+ [2025-10-27 18:52:40,962][main][INFO] - ---
709
+
710
+
711
+ [2025-10-27 18:52:40,963][main][INFO] - [T_total=31:33:20 | T_train=30:54:24] Start epoch 10
712
+ [2025-10-27 21:58:04,023][main][INFO] - [T_total=34:38:43 | T_train=33:59:47 | T_epoch=03:05:23] End of epoch 10 (73326 steps) train loss 5.94657
713
+ [2025-10-27 21:58:04,024][main][INFO] - [Epoch 10] All losses: [[diffusion=0.0795436 ; kl=5.65143e+06 ; lpips=0.186013 ; repa=0.490351]]
714
+ [2025-10-27 22:01:31,334][main][INFO] - [Epoch 11] Test metrics: [[MSE=24.04 | MAE=0.109 | LPIPS=0.1708 | PSNR=16.19 | SSIM=0.4055 | dreamsim=0.2477 | FID=21.54]]
715
+ [2025-10-27 22:01:31,336][main][INFO] - [Epoch 11] Best metrics: [[min_MSE=24.04 | min_MAE=0.109 | min_LPIPS=0.1708 | max_PSNR=16.19 | max_SSIM=0.4055 | min_dreamsim=0.2477 | min_FID=21.54]]
716
+ [2025-10-27 22:01:31,337][main][DEBUG] - Writing images to disk...
717
+ [2025-10-27 22:01:32,162][main][DEBUG] - Image(s) saved on disk
718
+ [2025-10-27 22:01:32,367][main][INFO] - End of epoch timers: [T_train=33:59:47 | T_epoch=03:05:23 | T_eval=00:38:12 | T_total=34:42:11]
719
+ [2025-10-27 22:01:32,368][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
720
+ [2025-10-27 22:01:42,990][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
721
+ [2025-10-27 22:01:52,731][main][INFO] - ---
722
+
723
+
724
+ [2025-10-27 22:01:52,731][main][INFO] - [T_total=34:42:32 | T_train=33:59:47] Start epoch 11
725
+ [2025-10-28 01:06:41,641][main][INFO] - [T_total=37:47:21 | T_train=37:04:36 | T_epoch=03:04:48] End of epoch 11 (79992 steps) train loss 794.458
726
+ [2025-10-28 01:06:41,643][main][INFO] - [Epoch 11] All losses: [[diffusion=0.0796192 ; kl=7.94164e+08 ; lpips=0.1859 ; repa=0.488066]]
727
+ [2025-10-28 01:10:08,942][main][INFO] - [Epoch 12] Test metrics: [[MSE=23.46 | MAE=0.1073 | LPIPS=0.1673 | PSNR=16.3 | SSIM=0.4107 | dreamsim=0.2413 | FID=20.23]]
728
+ [2025-10-28 01:10:08,944][main][INFO] - [Epoch 12] Best metrics: [[min_MSE=23.46 | min_MAE=0.1073 | min_LPIPS=0.1673 | max_PSNR=16.3 | max_SSIM=0.4107 | min_dreamsim=0.2413 | min_FID=20.23]]
729
+ [2025-10-28 01:10:08,945][main][DEBUG] - Writing images to disk...
730
+ [2025-10-28 01:10:09,790][main][DEBUG] - Image(s) saved on disk
731
+ [2025-10-28 01:10:09,998][main][INFO] - End of epoch timers: [T_train=37:04:36 | T_epoch=03:04:48 | T_eval=00:41:41 | T_total=37:50:49]
732
+ [2025-10-28 01:10:09,999][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
733
+ [2025-10-28 01:10:20,640][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
734
+ [2025-10-28 01:10:31,337][main][INFO] - ---
735
+
736
+
737
+ [2025-10-28 01:10:31,338][main][INFO] - [T_total=37:51:10 | T_train=37:04:36] Start epoch 12
738
+ [2025-10-28 04:15:54,812][main][INFO] - [T_total=40:56:34 | T_train=40:09:59 | T_epoch=03:05:23] End of epoch 12 (86658 steps) train loss 4.51982
739
+ [2025-10-28 04:15:54,814][main][INFO] - [Epoch 12] All losses: [[diffusion=0.0793437 ; kl=4.22649e+06 ; lpips=0.185086 ; repa=0.485754]]
740
+ [2025-10-28 04:19:22,090][main][INFO] - [Epoch 13] Test metrics: [[MSE=23.14 | MAE=0.1062 | LPIPS=0.1647 | PSNR=16.36 | SSIM=0.414 | dreamsim=0.2363 | FID=19.18]]
741
+ [2025-10-28 04:19:22,092][main][INFO] - [Epoch 13] Best metrics: [[min_MSE=23.14 | min_MAE=0.1062 | min_LPIPS=0.1647 | max_PSNR=16.36 | max_SSIM=0.414 | min_dreamsim=0.2363 | min_FID=19.18]]
742
+ [2025-10-28 04:19:22,093][main][DEBUG] - Writing images to disk...
743
+ [2025-10-28 04:19:22,937][main][DEBUG] - Image(s) saved on disk
744
+ [2025-10-28 04:19:23,137][main][INFO] - End of epoch timers: [T_train=40:09:59 | T_epoch=03:05:23 | T_eval=00:45:09 | T_total=41:00:02]
745
+ [2025-10-28 04:19:23,138][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
746
+ [2025-10-28 04:19:35,064][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
747
+ [2025-10-28 04:19:46,497][main][INFO] - ---
748
+
749
+
750
+ [2025-10-28 04:19:46,498][main][INFO] - [T_total=41:00:26 | T_train=40:09:59] Start epoch 13
751
+ [2025-10-28 07:25:31,821][main][INFO] - [T_total=44:06:11 | T_train=43:15:45 | T_epoch=03:05:45] End of epoch 13 (93324 steps) train loss 5.05172
752
+ [2025-10-28 07:25:31,823][main][INFO] - [Epoch 13] All losses: [[diffusion=0.0796848 ; kl=4.75808e+06 ; lpips=0.185526 ; repa=0.484762]]
753
+ [2025-10-28 07:28:58,913][main][INFO] - [Epoch 14] Test metrics: [[MSE=23.03 | MAE=0.106 | LPIPS=0.1633 | PSNR=16.38 | SSIM=0.4158 | dreamsim=0.2328 | FID=18.39]]
754
+ [2025-10-28 07:28:58,915][main][INFO] - [Epoch 14] Best metrics: [[min_MSE=23.03 | min_MAE=0.106 | min_LPIPS=0.1633 | max_PSNR=16.38 | max_SSIM=0.4158 | min_dreamsim=0.2328 | min_FID=18.39]]
755
+ [2025-10-28 07:28:58,916][main][DEBUG] - Writing images to disk...
756
+ [2025-10-28 07:28:59,747][main][DEBUG] - Image(s) saved on disk
757
+ [2025-10-28 07:28:59,993][main][INFO] - End of epoch timers: [T_train=43:15:45 | T_epoch=03:05:45 | T_eval=00:48:37 | T_total=44:09:39]
758
+ [2025-10-28 07:28:59,994][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
759
+ [2025-10-28 07:29:10,392][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
760
+ [2025-10-28 07:29:20,783][main][INFO] - ---
761
+
762
+
763
+ [2025-10-28 07:29:20,784][main][INFO] - [T_total=44:10:00 | T_train=43:15:45] Start epoch 14
764
+ [2025-10-28 10:35:12,763][main][INFO] - [T_total=47:15:52 | T_train=46:21:37 | T_epoch=03:05:51] End of epoch 14 (99990 steps) train loss 2.56252
765
+ [2025-10-28 10:35:12,765][main][INFO] - [Epoch 14] All losses: [[diffusion=0.0787443 ; kl=2.27203e+06 ; lpips=0.182851 ; repa=0.481282]]
766
+ [2025-10-28 10:38:39,849][main][INFO] - [Epoch 15] Test metrics: [[MSE=22.95 | MAE=0.1058 | LPIPS=0.1619 | PSNR=16.39 | SSIM=0.4198 | dreamsim=0.2294 | FID=17.64]]
767
+ [2025-10-28 10:38:39,851][main][INFO] - [Epoch 15] Best metrics: [[min_MSE=22.95 | min_MAE=0.1058 | min_LPIPS=0.1619 | max_PSNR=16.39 | max_SSIM=0.4198 | min_dreamsim=0.2294 | min_FID=17.64]]
768
+ [2025-10-28 10:38:39,853][main][DEBUG] - Writing images to disk...
769
+ [2025-10-28 10:38:40,690][main][DEBUG] - Image(s) saved on disk
770
+ [2025-10-28 10:38:40,939][main][INFO] - End of epoch timers: [T_train=46:21:37 | T_epoch=03:05:51 | T_eval=00:52:05 | T_total=47:19:20]
771
+ [2025-10-28 10:38:40,940][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
772
+ [2025-10-28 10:38:52,080][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
773
+ [2025-10-28 10:39:03,800][main][INFO] - ---
774
+
775
+
776
+ [2025-10-28 10:39:03,801][main][INFO] - [T_total=47:19:43 | T_train=46:21:37] Start epoch 15
777
+ [2025-10-28 13:44:56,004][main][INFO] - [T_total=50:25:35 | T_train=49:27:29 | T_epoch=03:05:52] End of epoch 15 (106656 steps) train loss 150.527
778
+ [2025-10-28 13:44:56,005][main][INFO] - [Epoch 15] All losses: [[diffusion=0.0778382 ; kl=1.50242e+08 ; lpips=0.177149 ; repa=0.475895]]
779
+ [2025-10-28 13:48:23,438][main][INFO] - [Epoch 16] Test metrics: [[MSE=22.91 | MAE=0.1058 | LPIPS=0.1609 | PSNR=16.4 | SSIM=0.4219 | dreamsim=0.2269 | FID=17.15]]
780
+ [2025-10-28 13:48:23,439][main][INFO] - [Epoch 16] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1609 | max_PSNR=16.4 | max_SSIM=0.4219 | min_dreamsim=0.2269 | min_FID=17.15]]
781
+ [2025-10-28 13:48:23,440][main][DEBUG] - Writing images to disk...
782
+ [2025-10-28 13:48:24,270][main][DEBUG] - Image(s) saved on disk
783
+ [2025-10-28 13:48:24,506][main][INFO] - End of epoch timers: [T_train=49:27:29 | T_epoch=03:05:52 | T_eval=00:55:33 | T_total=50:29:04]
784
+ [2025-10-28 13:48:24,507][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
785
+ [2025-10-28 13:48:36,131][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
786
+ [2025-10-28 13:48:46,875][main][INFO] - ---
787
+
788
+
789
+ [2025-10-28 13:48:46,876][main][INFO] - [T_total=50:29:26 | T_train=49:27:29] Start epoch 16
790
+ [2025-10-28 16:55:33,548][main][INFO] - [T_total=53:36:13 | T_train=52:34:16 | T_epoch=03:06:46] End of epoch 16 (113322 steps) train loss 254.554
791
+ [2025-10-28 16:55:33,549][main][INFO] - [Epoch 16] All losses: [[diffusion=0.0785965 ; kl=2.54265e+08 ; lpips=0.18152 ; repa=0.477904]]
792
+ [2025-10-28 16:59:00,697][main][INFO] - [Epoch 17] Test metrics: [[MSE=23.1 | MAE=0.1065 | LPIPS=0.16 | PSNR=16.36 | SSIM=0.425 | dreamsim=0.2245 | FID=16.63]]
793
+ [2025-10-28 16:59:00,699][main][INFO] - [Epoch 17] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.16 | max_PSNR=16.4 | max_SSIM=0.425 | min_dreamsim=0.2245 | min_FID=16.63]]
794
+ [2025-10-28 16:59:00,703][main][DEBUG] - Writing images to disk...
795
+ [2025-10-28 16:59:01,784][main][DEBUG] - Image(s) saved on disk
796
+ [2025-10-28 16:59:02,034][main][INFO] - End of epoch timers: [T_train=52:34:16 | T_epoch=03:06:46 | T_eval=00:59:02 | T_total=53:39:41]
797
+ [2025-10-28 16:59:02,035][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
798
+ [2025-10-28 16:59:13,349][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
799
+ [2025-10-28 16:59:24,416][main][INFO] - ---
800
+
801
+
802
+ [2025-10-28 16:59:24,417][main][INFO] - [T_total=53:40:03 | T_train=52:34:16] Start epoch 17
803
+ [2025-10-28 20:04:55,554][main][INFO] - [T_total=56:45:35 | T_train=55:39:47 | T_epoch=03:05:31] End of epoch 17 (119988 steps) train loss 3.54179
804
+ [2025-10-28 20:04:55,556][main][INFO] - [Epoch 17] All losses: [[diffusion=0.0778652 ; kl=3.25619e+06 ; lpips=0.178335 ; repa=0.474268]]
805
+ [2025-10-28 20:08:22,775][main][INFO] - [Epoch 18] Test metrics: [[MSE=23.09 | MAE=0.1066 | LPIPS=0.159 | PSNR=16.37 | SSIM=0.4268 | dreamsim=0.2221 | FID=16.09]]
806
+ [2025-10-28 20:08:22,777][main][INFO] - [Epoch 18] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.159 | max_PSNR=16.4 | max_SSIM=0.4268 | min_dreamsim=0.2221 | min_FID=16.09]]
807
+ [2025-10-28 20:08:22,778][main][DEBUG] - Writing images to disk...
808
+ [2025-10-28 20:08:23,864][main][DEBUG] - Image(s) saved on disk
809
+ [2025-10-28 20:08:24,074][main][INFO] - End of epoch timers: [T_train=55:39:47 | T_epoch=03:05:31 | T_eval=01:02:30 | T_total=56:49:03]
810
+ [2025-10-28 20:08:24,075][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
811
+ [2025-10-28 20:08:35,565][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
812
+ [2025-10-28 20:08:47,122][main][INFO] - ---
813
+
814
+
815
+ [2025-10-28 20:08:47,124][main][INFO] - [T_total=56:49:26 | T_train=55:39:47] Start epoch 18
816
+ [2025-10-28 23:13:47,257][main][INFO] - [T_total=59:54:26 | T_train=58:44:47 | T_epoch=03:05:00] End of epoch 18 (126654 steps) train loss 8.77081
817
+ [2025-10-28 23:13:47,258][main][INFO] - [Epoch 18] All losses: [[diffusion=0.0783209 ; kl=8.4835e+06 ; lpips=0.180506 ; repa=0.474953]]
818
+ [2025-10-28 23:17:14,615][main][INFO] - [Epoch 19] Test metrics: [[MSE=23.25 | MAE=0.1072 | LPIPS=0.1586 | PSNR=16.34 | SSIM=0.4269 | dreamsim=0.2204 | FID=15.71]]
819
+ [2025-10-28 23:17:14,617][main][INFO] - [Epoch 19] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1586 | max_PSNR=16.4 | max_SSIM=0.4269 | min_dreamsim=0.2204 | min_FID=15.71]]
820
+ [2025-10-28 23:17:14,618][main][DEBUG] - Writing images to disk...
821
+ [2025-10-28 23:17:15,450][main][DEBUG] - Image(s) saved on disk
822
+ [2025-10-28 23:17:15,693][main][INFO] - End of epoch timers: [T_train=58:44:47 | T_epoch=03:05:00 | T_eval=01:05:58 | T_total=59:57:55]
823
+ [2025-10-28 23:17:15,694][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
824
+ [2025-10-28 23:17:25,883][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
825
+ [2025-10-28 23:17:35,331][main][INFO] - ---
826
+
827
+
828
+ [2025-10-28 23:17:35,332][main][INFO] - [T_total=59:58:14 | T_train=58:44:47] Start epoch 19
829
+ [2025-10-29 02:22:52,012][main][INFO] - [T_total=63:03:31 | T_train=61:50:04 | T_epoch=03:05:16] End of epoch 19 (133320 steps) train loss 65.3624
830
+ [2025-10-29 02:22:52,014][main][INFO] - [Epoch 19] All losses: [[diffusion=0.0772616 ; kl=6.50794e+07 ; lpips=0.176332 ; repa=0.470315]]
831
+ [2025-10-29 02:26:19,233][main][INFO] - [Epoch 20] Test metrics: [[MSE=23.28 | MAE=0.1074 | LPIPS=0.1579 | PSNR=16.33 | SSIM=0.4276 | dreamsim=0.2187 | FID=15.33]]
832
+ [2025-10-29 02:26:19,250][main][INFO] - [Epoch 20] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1579 | max_PSNR=16.4 | max_SSIM=0.4276 | min_dreamsim=0.2187 | min_FID=15.33]]
833
+ [2025-10-29 02:26:19,251][main][DEBUG] - Writing images to disk...
834
+ [2025-10-29 02:26:20,086][main][DEBUG] - Image(s) saved on disk
835
+ [2025-10-29 02:26:20,336][main][INFO] - End of epoch timers: [T_train=61:50:04 | T_epoch=03:05:16 | T_eval=01:09:26 | T_total=63:06:59]
836
+ [2025-10-29 02:26:20,337][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
837
+ [2025-10-29 02:26:31,555][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
838
+ [2025-10-29 02:26:42,308][main][INFO] - ---
839
+
840
+
841
+ [2025-10-29 02:26:42,309][main][INFO] - [T_total=63:07:21 | T_train=61:50:04] Start epoch 20
842
+ [2025-10-29 05:32:23,347][main][INFO] - [T_total=66:13:02 | T_train=64:55:45 | T_epoch=03:05:41] End of epoch 20 (139986 steps) train loss 0.949199
843
+ [2025-10-29 05:32:23,349][main][INFO] - [Epoch 20] All losses: [[diffusion=0.07719 ; kl=667540 ; lpips=0.174758 ; repa=0.468358]]
844
+ [2025-10-29 05:35:50,507][main][INFO] - [Epoch 21] Test metrics: [[MSE=23.35 | MAE=0.1078 | LPIPS=0.1576 | PSNR=16.32 | SSIM=0.4284 | dreamsim=0.2173 | FID=15.02]]
845
+ [2025-10-29 05:35:50,509][main][INFO] - [Epoch 21] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1576 | max_PSNR=16.4 | max_SSIM=0.4284 | min_dreamsim=0.2173 | min_FID=15.02]]
846
+ [2025-10-29 05:35:50,510][main][DEBUG] - Writing images to disk...
847
+ [2025-10-29 05:35:51,347][main][DEBUG] - Image(s) saved on disk
848
+ [2025-10-29 05:35:51,557][main][INFO] - End of epoch timers: [T_train=64:55:45 | T_epoch=03:05:41 | T_eval=01:12:55 | T_total=66:16:31]
849
+ [2025-10-29 05:35:51,558][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
850
+ [2025-10-29 05:36:02,284][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
851
+ [2025-10-29 05:36:12,666][main][INFO] - ---
852
+
853
+
854
+ [2025-10-29 05:36:12,667][main][INFO] - [T_total=66:16:52 | T_train=64:55:45] Start epoch 21
855
+ [2025-10-29 08:42:54,495][main][INFO] - [T_total=69:23:34 | T_train=68:02:26 | T_epoch=03:06:41] End of epoch 21 (146652 steps) train loss 289.216
856
+ [2025-10-29 08:42:54,496][main][INFO] - [Epoch 21] All losses: [[diffusion=0.0776409 ; kl=2.88933e+08 ; lpips=0.176763 ; repa=0.469242]]
857
+ [2025-10-29 08:46:22,241][main][INFO] - [Epoch 22] Test metrics: [[MSE=23.57 | MAE=0.1086 | LPIPS=0.1573 | PSNR=16.28 | SSIM=0.4288 | dreamsim=0.2159 | FID=14.7]]
858
+ [2025-10-29 08:46:22,243][main][INFO] - [Epoch 22] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1573 | max_PSNR=16.4 | max_SSIM=0.4288 | min_dreamsim=0.2159 | min_FID=14.7]]
859
+ [2025-10-29 08:46:22,244][main][DEBUG] - Writing images to disk...
860
+ [2025-10-29 08:46:23,102][main][DEBUG] - Image(s) saved on disk
861
+ [2025-10-29 08:46:23,340][main][INFO] - End of epoch timers: [T_train=68:02:26 | T_epoch=03:06:41 | T_eval=01:16:23 | T_total=69:27:02]
862
+ [2025-10-29 08:46:23,341][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
863
+ [2025-10-29 08:46:34,825][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
864
+ [2025-10-29 08:46:45,943][main][INFO] - ---
865
+
866
+
867
+ [2025-10-29 08:46:45,944][main][INFO] - [T_total=69:27:25 | T_train=68:02:26] Start epoch 22
868
+ [2025-10-29 11:52:25,547][main][INFO] - [T_total=72:33:05 | T_train=71:08:06 | T_epoch=03:05:39] End of epoch 22 (153318 steps) train loss 6.70619e+06
869
+ [2025-10-29 11:52:25,549][main][INFO] - [Epoch 22] All losses: [[diffusion=0.0782495 ; kl=6.70619e+12 ; lpips=0.180401 ; repa=0.471485]]
870
+ [2025-10-29 11:55:52,869][main][INFO] - [Epoch 23] Test metrics: [[MSE=23.7 | MAE=0.109 | LPIPS=0.157 | PSNR=16.25 | SSIM=0.4303 | dreamsim=0.2147 | FID=14.49]]
871
+ [2025-10-29 11:55:52,871][main][INFO] - [Epoch 23] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.157 | max_PSNR=16.4 | max_SSIM=0.4303 | min_dreamsim=0.2147 | min_FID=14.49]]
872
+ [2025-10-29 11:55:52,872][main][DEBUG] - Writing images to disk...
873
+ [2025-10-29 11:55:53,703][main][DEBUG] - Image(s) saved on disk
874
+ [2025-10-29 11:55:53,941][main][INFO] - End of epoch timers: [T_train=71:08:06 | T_epoch=03:05:39 | T_eval=01:19:51 | T_total=72:36:33]
875
+ [2025-10-29 11:55:53,943][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
876
+ [2025-10-29 11:56:05,150][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
877
+ [2025-10-29 11:56:16,440][main][INFO] - ---
878
+
879
+
880
+ [2025-10-29 11:56:16,441][main][INFO] - [T_total=72:36:55 | T_train=71:08:06] Start epoch 23
881
+ [2025-10-29 15:01:26,171][main][INFO] - [T_total=75:42:05 | T_train=74:13:16 | T_epoch=03:05:09] End of epoch 23 (159984 steps) train loss 168.462
882
+ [2025-10-29 15:01:26,172][main][INFO] - [Epoch 23] All losses: [[diffusion=0.0781783 ; kl=1.68175e+08 ; lpips=0.18157 ; repa=0.471573]]
883
+ [2025-10-29 15:04:53,505][main][INFO] - [Epoch 24] Test metrics: [[MSE=23.89 | MAE=0.1096 | LPIPS=0.1566 | PSNR=16.22 | SSIM=0.4302 | dreamsim=0.2137 | FID=14.27]]
884
+ [2025-10-29 15:04:53,507][main][INFO] - [Epoch 24] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1566 | max_PSNR=16.4 | max_SSIM=0.4303 | min_dreamsim=0.2137 | min_FID=14.27]]
885
+ [2025-10-29 15:04:53,508][main][DEBUG] - Writing images to disk...
886
+ [2025-10-29 15:04:54,339][main][DEBUG] - Image(s) saved on disk
887
+ [2025-10-29 15:04:54,543][main][INFO] - End of epoch timers: [T_train=74:13:16 | T_epoch=03:05:09 | T_eval=01:23:20 | T_total=75:45:34]
888
+ [2025-10-29 15:04:54,544][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
889
+ [2025-10-29 15:05:05,591][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
890
+ [2025-10-29 15:05:16,848][main][INFO] - ---
891
+
892
+
893
+ [2025-10-29 15:05:16,849][main][INFO] - [T_total=75:45:56 | T_train=74:13:16] Start epoch 24
894
+ [2025-10-29 18:10:51,689][main][INFO] - [T_total=78:51:31 | T_train=77:18:51 | T_epoch=03:05:34] End of epoch 24 (166650 steps) train loss 1.72129
895
+ [2025-10-29 18:10:51,690][main][INFO] - [Epoch 24] All losses: [[diffusion=0.0766599 ; kl=1.44158e+06 ; lpips=0.173953 ; repa=0.464302]]
896
+ [2025-10-29 18:14:18,759][main][INFO] - [Epoch 25] Test metrics: [[MSE=24.04 | MAE=0.1101 | LPIPS=0.1564 | PSNR=16.19 | SSIM=0.4306 | dreamsim=0.2126 | FID=13.99]]
897
+ [2025-10-29 18:14:18,761][main][INFO] - [Epoch 25] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1564 | max_PSNR=16.4 | max_SSIM=0.4306 | min_dreamsim=0.2126 | min_FID=13.99]]
898
+ [2025-10-29 18:14:18,762][main][DEBUG] - Writing images to disk...
899
+ [2025-10-29 18:14:19,594][main][DEBUG] - Image(s) saved on disk
900
+ [2025-10-29 18:14:19,796][main][INFO] - End of epoch timers: [T_train=77:18:51 | T_epoch=03:05:34 | T_eval=01:26:48 | T_total=78:54:59]
901
+ [2025-10-29 18:14:19,797][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
902
+ [2025-10-29 18:14:31,104][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
903
+ [2025-10-29 18:14:42,095][main][INFO] - ---
904
+
905
+
906
+ [2025-10-29 18:14:42,095][main][INFO] - [T_total=78:55:21 | T_train=77:18:51] Start epoch 25
907
+ [2025-10-29 21:19:56,417][main][INFO] - [T_total=82:00:35 | T_train=80:24:05 | T_epoch=03:05:14] End of epoch 25 (173316 steps) train loss 2.83654e+07
908
+ [2025-10-29 21:19:56,418][main][INFO] - [Epoch 25] All losses: [[diffusion=0.0773898 ; kl=2.83654e+13 ; lpips=0.17699 ; repa=0.466391]]
909
+ [2025-10-29 21:23:23,926][main][INFO] - [Epoch 26] Test metrics: [[MSE=24.22 | MAE=0.1107 | LPIPS=0.1564 | PSNR=16.16 | SSIM=0.4312 | dreamsim=0.2119 | FID=13.81]]
910
+ [2025-10-29 21:23:23,928][main][INFO] - [Epoch 26] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1564 | max_PSNR=16.4 | max_SSIM=0.4312 | min_dreamsim=0.2119 | min_FID=13.81]]
911
+ [2025-10-29 21:23:23,929][main][DEBUG] - Writing images to disk...
912
+ [2025-10-29 21:23:25,021][main][DEBUG] - Image(s) saved on disk
913
+ [2025-10-29 21:23:25,253][main][INFO] - End of epoch timers: [T_train=80:24:05 | T_epoch=03:05:14 | T_eval=01:30:16 | T_total=82:04:04]
914
+ [2025-10-29 21:23:25,254][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
915
+ [2025-10-29 21:23:36,113][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
916
+ [2025-10-29 21:23:47,503][main][INFO] - ---
917
+
918
+
919
+ [2025-10-29 21:23:47,504][main][INFO] - [T_total=82:04:27 | T_train=80:24:05] Start epoch 26
920
+ [2025-10-30 00:29:03,175][main][INFO] - [T_total=85:09:42 | T_train=83:29:21 | T_epoch=03:05:15] End of epoch 26 (179982 steps) train loss 2.92682
921
+ [2025-10-30 00:29:03,176][main][INFO] - [Epoch 26] All losses: [[diffusion=0.076385 ; kl=2.6494e+06 ; lpips=0.171497 ; repa=0.46114]]
922
+ [2025-10-30 00:32:30,822][main][INFO] - [Epoch 27] Test metrics: [[MSE=24.25 | MAE=0.1109 | LPIPS=0.1556 | PSNR=16.15 | SSIM=0.4313 | dreamsim=0.2106 | FID=13.64]]
923
+ [2025-10-30 00:32:30,823][main][INFO] - [Epoch 27] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1556 | max_PSNR=16.4 | max_SSIM=0.4313 | min_dreamsim=0.2106 | min_FID=13.64]]
924
+ [2025-10-30 00:32:30,825][main][DEBUG] - Writing images to disk...
925
+ [2025-10-30 00:32:31,903][main][DEBUG] - Image(s) saved on disk
926
+ [2025-10-30 00:32:32,151][main][INFO] - End of epoch timers: [T_train=83:29:21 | T_epoch=03:05:15 | T_eval=01:33:45 | T_total=85:13:11]
927
+ [2025-10-30 00:32:32,152][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
928
+ [2025-10-30 00:32:42,864][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
929
+ [2025-10-30 00:32:54,008][main][INFO] - ---
930
+
931
+
932
+ [2025-10-30 00:32:54,009][main][INFO] - [T_total=85:13:33 | T_train=83:29:21] Start epoch 27
933
+ [2025-10-30 03:37:54,148][main][INFO] - [T_total=88:18:33 | T_train=86:34:21 | T_epoch=03:05:00] End of epoch 27 (186648 steps) train loss 0.940592
934
+ [2025-10-30 03:37:54,149][main][INFO] - [Epoch 27] All losses: [[diffusion=0.0759206 ; kl=665087 ; lpips=0.169737 ; repa=0.458863]]
935
+ [2025-10-30 03:41:21,260][main][INFO] - [Epoch 28] Test metrics: [[MSE=24.31 | MAE=0.1112 | LPIPS=0.1553 | PSNR=16.14 | SSIM=0.4322 | dreamsim=0.2096 | FID=13.42]]
936
+ [2025-10-30 03:41:21,262][main][INFO] - [Epoch 28] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1553 | max_PSNR=16.4 | max_SSIM=0.4322 | min_dreamsim=0.2096 | min_FID=13.42]]
937
+ [2025-10-30 03:41:21,263][main][DEBUG] - Writing images to disk...
938
+ [2025-10-30 03:41:22,104][main][DEBUG] - Image(s) saved on disk
939
+ [2025-10-30 03:41:22,371][main][INFO] - End of epoch timers: [T_train=86:34:21 | T_epoch=03:05:00 | T_eval=01:37:13 | T_total=88:22:01]
940
+ [2025-10-30 03:41:22,372][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
941
+ [2025-10-30 03:41:34,752][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
942
+ [2025-10-30 03:41:46,656][main][INFO] - ---
943
+
944
+
945
+ [2025-10-30 03:41:46,657][main][INFO] - [T_total=88:22:26 | T_train=86:34:21] Start epoch 28
946
+ [2025-10-30 06:47:29,123][main][INFO] - [T_total=91:28:08 | T_train=89:40:03 | T_epoch=03:05:42] End of epoch 28 (193314 steps) train loss 37.7653
947
+ [2025-10-30 06:47:29,124][main][INFO] - [Epoch 28] All losses: [[diffusion=0.0768294 ; kl=3.74862e+07 ; lpips=0.173765 ; repa=0.461866]]
948
+ [2025-10-30 06:50:55,982][main][INFO] - [Epoch 29] Test metrics: [[MSE=24.39 | MAE=0.1115 | LPIPS=0.1553 | PSNR=16.13 | SSIM=0.4326 | dreamsim=0.209 | FID=13.31]]
949
+ [2025-10-30 06:50:55,983][main][INFO] - [Epoch 29] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1553 | max_PSNR=16.4 | max_SSIM=0.4326 | min_dreamsim=0.209 | min_FID=13.31]]
950
+ [2025-10-30 06:50:55,984][main][DEBUG] - Writing images to disk...
951
+ [2025-10-30 06:50:56,817][main][DEBUG] - Image(s) saved on disk
952
+ [2025-10-30 06:50:57,019][main][INFO] - End of epoch timers: [T_train=89:40:03 | T_epoch=03:05:42 | T_eval=01:40:41 | T_total=91:31:36]
953
+ [2025-10-30 06:50:57,021][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
954
+ [2025-10-30 06:51:07,339][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
955
+ [2025-10-30 06:51:16,820][main][INFO] - ---
956
+
957
+
958
+ [2025-10-30 06:51:16,821][main][INFO] - [T_total=91:31:56 | T_train=89:40:03] Start epoch 29
959
+ [2025-10-30 09:56:41,969][main][INFO] - [T_total=94:37:21 | T_train=92:45:28 | T_epoch=03:05:25] End of epoch 29 (199980 steps) train loss 422
960
+ [2025-10-30 09:56:41,970][main][INFO] - [Epoch 29] All losses: [[diffusion=0.0763346 ; kl=4.21723e+08 ; lpips=0.171742 ; repa=0.459557]]
961
+ [2025-10-30 10:00:09,392][main][INFO] - [Epoch 30] Test metrics: [[MSE=24.44 | MAE=0.1117 | LPIPS=0.1547 | PSNR=16.12 | SSIM=0.4334 | dreamsim=0.2078 | FID=13.08]]
962
+ [2025-10-30 10:00:09,394][main][INFO] - [Epoch 30] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1547 | max_PSNR=16.4 | max_SSIM=0.4334 | min_dreamsim=0.2078 | min_FID=13.08]]
963
+ [2025-10-30 10:00:09,396][main][DEBUG] - Writing images to disk...
964
+ [2025-10-30 10:00:10,246][main][DEBUG] - Image(s) saved on disk
965
+ [2025-10-30 10:00:10,452][main][INFO] - End of epoch timers: [T_train=92:45:28 | T_epoch=03:05:25 | T_eval=01:44:09 | T_total=94:40:49]
966
+ [2025-10-30 10:00:10,453][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
967
+ [2025-10-30 10:00:21,119][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
968
+ [2025-10-30 10:00:31,248][main][INFO] - ---
969
+
970
+
971
+ [2025-10-30 10:00:31,249][main][INFO] - [T_total=94:41:10 | T_train=92:45:28] Start epoch 30
972
+ [2025-10-30 13:06:42,974][main][INFO] - [T_total=97:47:22 | T_train=95:51:40 | T_epoch=03:06:11] End of epoch 30 (206646 steps) train loss 255193
973
+ [2025-10-30 13:06:42,975][main][INFO] - [Epoch 30] All losses: [[diffusion=0.0769152 ; kl=2.55193e+11 ; lpips=0.174071 ; repa=0.460975]]
974
+ [2025-10-30 13:10:10,113][main][INFO] - [Epoch 31] Test metrics: [[MSE=24.51 | MAE=0.112 | LPIPS=0.1546 | PSNR=16.11 | SSIM=0.4338 | dreamsim=0.2072 | FID=12.95]]
975
+ [2025-10-30 13:10:10,116][main][INFO] - [Epoch 31] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1546 | max_PSNR=16.4 | max_SSIM=0.4338 | min_dreamsim=0.2072 | min_FID=12.95]]
976
+ [2025-10-30 13:10:10,117][main][DEBUG] - Writing images to disk...
977
+ [2025-10-30 13:10:10,957][main][DEBUG] - Image(s) saved on disk
978
+ [2025-10-30 13:10:11,195][main][INFO] - End of epoch timers: [T_train=95:51:40 | T_epoch=03:06:11 | T_eval=01:47:37 | T_total=97:50:50]
979
+ [2025-10-30 13:10:11,197][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
980
+ [2025-10-30 13:10:21,570][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
981
+ [2025-10-30 13:10:31,290][main][INFO] - ---
982
+
983
+
984
+ [2025-10-30 13:10:31,291][main][INFO] - [T_total=97:51:10 | T_train=95:51:40] Start epoch 31
985
+ [2025-10-30 16:15:53,375][main][INFO] - [T_total=100:56:32 | T_train=98:57:02 | T_epoch=03:05:22] End of epoch 31 (213312 steps) train loss 29024
986
+ [2025-10-30 16:15:53,376][main][INFO] - [Epoch 31] All losses: [[diffusion=0.0770869 ; kl=2.90237e+10 ; lpips=0.1765 ; repa=0.462709]]
987
+ [2025-10-30 16:19:20,850][main][INFO] - [Epoch 32] Test metrics: [[MSE=24.52 | MAE=0.1121 | LPIPS=0.1545 | PSNR=16.1 | SSIM=0.4351 | dreamsim=0.2065 | FID=12.85]]
988
+ [2025-10-30 16:19:20,852][main][INFO] - [Epoch 32] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1545 | max_PSNR=16.4 | max_SSIM=0.4351 | min_dreamsim=0.2065 | min_FID=12.85]]
989
+ [2025-10-30 16:19:20,854][main][DEBUG] - Writing images to disk...
990
+ [2025-10-30 16:19:21,697][main][DEBUG] - Image(s) saved on disk
991
+ [2025-10-30 16:19:21,942][main][INFO] - End of epoch timers: [T_train=98:57:02 | T_epoch=03:05:22 | T_eval=01:51:06 | T_total=101:00:01]
992
+ [2025-10-30 16:19:21,942][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
993
+ [2025-10-30 16:19:33,022][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
994
+ [2025-10-30 16:19:43,573][main][INFO] - ---
995
+
996
+
997
+ [2025-10-30 16:19:43,574][main][INFO] - [T_total=101:00:23 | T_train=98:57:02] Start epoch 32
998
+ [2025-10-30 19:24:58,751][main][INFO] - [T_total=104:05:38 | T_train=102:02:17 | T_epoch=03:05:15] End of epoch 32 (219978 steps) train loss 20.2597
999
+ [2025-10-30 19:24:58,752][main][INFO] - [Epoch 32] All losses: [[diffusion=0.0767827 ; kl=1.99806e+07 ; lpips=0.174456 ; repa=0.460265]]
1000
+ [2025-10-30 19:28:26,469][main][INFO] - [Epoch 33] Test metrics: [[MSE=24.6 | MAE=0.1125 | LPIPS=0.1544 | PSNR=16.09 | SSIM=0.4346 | dreamsim=0.206 | FID=12.76]]
1001
+ [2025-10-30 19:28:26,471][main][INFO] - [Epoch 33] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1544 | max_PSNR=16.4 | max_SSIM=0.4351 | min_dreamsim=0.206 | min_FID=12.76]]
1002
+ [2025-10-30 19:28:26,473][main][DEBUG] - Writing images to disk...
1003
+ [2025-10-30 19:28:27,304][main][DEBUG] - Image(s) saved on disk
1004
+ [2025-10-30 19:28:27,556][main][INFO] - End of epoch timers: [T_train=102:02:17 | T_epoch=03:05:15 | T_eval=01:54:34 | T_total=104:09:07]
1005
+ [2025-10-30 19:28:27,558][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1006
+ [2025-10-30 19:28:38,944][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1007
+ [2025-10-30 19:28:50,181][main][INFO] - ---
1008
+
1009
+
1010
+ [2025-10-30 19:28:50,182][main][INFO] - [T_total=104:09:29 | T_train=102:02:17] Start epoch 33
1011
+ [2025-10-30 22:34:36,461][main][INFO] - [T_total=107:15:16 | T_train=105:08:04 | T_epoch=03:05:46] End of epoch 33 (226644 steps) train loss 176.126
1012
+ [2025-10-30 22:34:36,462][main][INFO] - [Epoch 33] All losses: [[diffusion=0.0773723 ; kl=1.75845e+08 ; lpips=0.176956 ; repa=0.462503]]
1013
+ [2025-10-30 22:38:03,690][main][INFO] - [Epoch 34] Test metrics: [[MSE=24.58 | MAE=0.1124 | LPIPS=0.1542 | PSNR=16.09 | SSIM=0.4358 | dreamsim=0.2053 | FID=12.61]]
1014
+ [2025-10-30 22:38:03,692][main][INFO] - [Epoch 34] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1542 | max_PSNR=16.4 | max_SSIM=0.4358 | min_dreamsim=0.2053 | min_FID=12.61]]
1015
+ [2025-10-30 22:38:03,692][main][DEBUG] - Writing images to disk...
1016
+ [2025-10-30 22:38:04,540][main][DEBUG] - Image(s) saved on disk
1017
+ [2025-10-30 22:38:04,752][main][INFO] - End of epoch timers: [T_train=105:08:04 | T_epoch=03:05:46 | T_eval=01:58:02 | T_total=107:18:44]
1018
+ [2025-10-30 22:38:04,753][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1019
+ [2025-10-30 22:38:16,516][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1020
+ [2025-10-30 22:38:27,966][main][INFO] - ---
1021
+
1022
+
1023
+ [2025-10-30 22:38:27,967][main][INFO] - [T_total=107:19:07 | T_train=105:08:04] Start epoch 34
1024
+ [2025-10-31 01:43:52,689][main][INFO] - [T_total=110:24:32 | T_train=108:13:28 | T_epoch=03:05:24] End of epoch 34 (233310 steps) train loss 3098.35
1025
+ [2025-10-31 01:43:52,690][main][INFO] - [Epoch 34] All losses: [[diffusion=0.0765349 ; kl=3.09807e+09 ; lpips=0.174148 ; repa=0.459381]]
1026
+ [2025-10-31 01:47:20,181][main][INFO] - [Epoch 35] Test metrics: [[MSE=24.56 | MAE=0.1125 | LPIPS=0.1538 | PSNR=16.1 | SSIM=0.4369 | dreamsim=0.2045 | FID=12.45]]
1027
+ [2025-10-31 01:47:20,183][main][INFO] - [Epoch 35] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1538 | max_PSNR=16.4 | max_SSIM=0.4369 | min_dreamsim=0.2045 | min_FID=12.45]]
1028
+ [2025-10-31 01:47:20,184][main][DEBUG] - Writing images to disk...
1029
+ [2025-10-31 01:47:21,284][main][DEBUG] - Image(s) saved on disk
1030
+ [2025-10-31 01:47:21,496][main][INFO] - End of epoch timers: [T_train=108:13:28 | T_epoch=03:05:24 | T_eval=02:01:31 | T_total=110:28:01]
1031
+ [2025-10-31 01:47:21,497][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1032
+ [2025-10-31 01:47:32,897][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1033
+ [2025-10-31 01:47:43,716][main][INFO] - ---
1034
+
1035
+
1036
+ [2025-10-31 01:47:43,717][main][INFO] - [T_total=110:28:23 | T_train=108:13:28] Start epoch 35
1037
+ [2025-10-31 04:53:48,886][main][INFO] - [T_total=113:34:28 | T_train=111:19:33 | T_epoch=03:06:05] End of epoch 35 (239976 steps) train loss 44306.2
1038
+ [2025-10-31 04:53:48,887][main][INFO] - [Epoch 35] All losses: [[diffusion=0.0759655 ; kl=4.43059e+10 ; lpips=0.170242 ; repa=0.455204]]
1039
+ [2025-10-31 04:57:16,489][main][INFO] - [Epoch 36] Test metrics: [[MSE=24.6 | MAE=0.1126 | LPIPS=0.1535 | PSNR=16.09 | SSIM=0.4367 | dreamsim=0.2038 | FID=12.31]]
1040
+ [2025-10-31 04:57:16,491][main][INFO] - [Epoch 36] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1535 | max_PSNR=16.4 | max_SSIM=0.4369 | min_dreamsim=0.2038 | min_FID=12.31]]
1041
+ [2025-10-31 04:57:16,493][main][DEBUG] - Writing images to disk...
1042
+ [2025-10-31 04:57:17,591][main][DEBUG] - Image(s) saved on disk
1043
+ [2025-10-31 04:57:17,835][main][INFO] - End of epoch timers: [T_train=111:19:33 | T_epoch=03:06:05 | T_eval=02:05:00 | T_total=113:37:57]
1044
+ [2025-10-31 04:57:17,836][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1045
+ [2025-10-31 04:57:28,300][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1046
+ [2025-10-31 04:57:38,419][main][INFO] - ---
1047
+
1048
+
1049
+ [2025-10-31 04:57:38,420][main][INFO] - [T_total=113:38:17 | T_train=111:19:33] Start epoch 36
1050
+ [2025-10-31 08:03:40,896][main][INFO] - [T_total=116:44:20 | T_train=114:25:36 | T_epoch=03:06:02] End of epoch 36 (246642 steps) train loss 13991
1051
+ [2025-10-31 08:03:40,897][main][INFO] - [Epoch 36] All losses: [[diffusion=0.0760433 ; kl=1.39907e+10 ; lpips=0.169438 ; repa=0.454577]]
1052
+ [2025-10-31 08:07:07,981][main][INFO] - [Epoch 37] Test metrics: [[MSE=24.59 | MAE=0.1126 | LPIPS=0.1533 | PSNR=16.09 | SSIM=0.4386 | dreamsim=0.2031 | FID=12.18]]
1053
+ [2025-10-31 08:07:07,982][main][INFO] - [Epoch 37] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1533 | max_PSNR=16.4 | max_SSIM=0.4386 | min_dreamsim=0.2031 | min_FID=12.18]]
1054
+ [2025-10-31 08:07:07,983][main][DEBUG] - Writing images to disk...
1055
+ [2025-10-31 08:07:08,817][main][DEBUG] - Image(s) saved on disk
1056
+ [2025-10-31 08:07:09,048][main][INFO] - End of epoch timers: [T_train=114:25:36 | T_epoch=03:06:02 | T_eval=02:08:28 | T_total=116:47:48]
1057
+ [2025-10-31 08:07:09,049][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1058
+ [2025-10-31 08:07:19,835][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1059
+ [2025-10-31 08:07:30,639][main][INFO] - ---
1060
+
1061
+
1062
+ [2025-10-31 08:07:30,640][main][INFO] - [T_total=116:48:10 | T_train=114:25:36] Start epoch 37
1063
+ [2025-10-31 11:12:53,371][main][INFO] - [T_total=119:53:32 | T_train=117:30:59 | T_epoch=03:05:22] End of epoch 37 (253308 steps) train loss 13991.7
1064
+ [2025-10-31 11:12:53,372][main][INFO] - [Epoch 37] All losses: [[diffusion=0.0767062 ; kl=1.39915e+10 ; lpips=0.174108 ; repa=0.4581]]
1065
+ [2025-10-31 11:16:19,970][main][INFO] - [Epoch 38] Test metrics: [[MSE=24.53 | MAE=0.1125 | LPIPS=0.1528 | PSNR=16.1 | SSIM=0.4386 | dreamsim=0.2024 | FID=12.07]]
1066
+ [2025-10-31 11:16:19,972][main][INFO] - [Epoch 38] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1528 | max_PSNR=16.4 | max_SSIM=0.4386 | min_dreamsim=0.2024 | min_FID=12.07]]
1067
+ [2025-10-31 11:16:19,973][main][DEBUG] - Writing images to disk...
1068
+ [2025-10-31 11:16:20,809][main][DEBUG] - Image(s) saved on disk
1069
+ [2025-10-31 11:16:21,014][main][INFO] - End of epoch timers: [T_train=117:30:59 | T_epoch=03:05:22 | T_eval=02:11:55 | T_total=119:57:00]
1070
+ [2025-10-31 11:16:21,015][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1071
+ [2025-10-31 11:16:32,831][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1072
+ [2025-10-31 11:16:44,606][main][INFO] - ---
1073
+
1074
+
1075
+ [2025-10-31 11:16:44,607][main][INFO] - [T_total=119:57:24 | T_train=117:30:59] Start epoch 38
1076
+ [2025-10-31 14:21:53,789][main][INFO] - [T_total=123:02:33 | T_train=120:36:08 | T_epoch=03:05:09] End of epoch 38 (259974 steps) train loss 957.097
1077
+ [2025-10-31 14:21:53,790][main][INFO] - [Epoch 38] All losses: [[diffusion=0.0761426 ; kl=9.56822e+08 ; lpips=0.170405 ; repa=0.454693]]
1078
+ [2025-10-31 14:25:20,866][main][INFO] - [Epoch 39] Test metrics: [[MSE=24.69 | MAE=0.1129 | LPIPS=0.1528 | PSNR=16.07 | SSIM=0.4388 | dreamsim=0.2019 | FID=11.95]]
1079
+ [2025-10-31 14:25:20,868][main][INFO] - [Epoch 39] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1528 | max_PSNR=16.4 | max_SSIM=0.4388 | min_dreamsim=0.2019 | min_FID=11.95]]
1080
+ [2025-10-31 14:25:20,869][main][DEBUG] - Writing images to disk...
1081
+ [2025-10-31 14:25:21,702][main][DEBUG] - Image(s) saved on disk
1082
+ [2025-10-31 14:25:21,953][main][INFO] - End of epoch timers: [T_train=120:36:08 | T_epoch=03:05:09 | T_eval=02:15:23 | T_total=123:06:01]
1083
+ [2025-10-31 14:25:21,955][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1084
+ [2025-10-31 14:25:32,079][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1085
+ [2025-10-31 14:25:43,132][main][INFO] - ---
1086
+
1087
+
1088
+ [2025-10-31 14:25:43,136][main][INFO] - [T_total=123:06:22 | T_train=120:36:08] Start epoch 39
1089
+ [2025-10-31 17:30:32,200][main][INFO] - [T_total=126:11:11 | T_train=123:40:57 | T_epoch=03:04:49] End of epoch 39 (266640 steps) train loss 2134.48
1090
+ [2025-10-31 17:30:32,202][main][INFO] - [Epoch 39] All losses: [[diffusion=0.076013 ; kl=2.1342e+09 ; lpips=0.169987 ; repa=0.453857]]
1091
+ [2025-10-31 17:33:59,741][main][INFO] - [Epoch 40] Test metrics: [[MSE=24.76 | MAE=0.1132 | LPIPS=0.1528 | PSNR=16.06 | SSIM=0.4394 | dreamsim=0.2017 | FID=11.91]]
1092
+ [2025-10-31 17:33:59,743][main][INFO] - [Epoch 40] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1528 | max_PSNR=16.4 | max_SSIM=0.4394 | min_dreamsim=0.2017 | min_FID=11.91]]
1093
+ [2025-10-31 17:33:59,744][main][DEBUG] - Writing images to disk...
1094
+ [2025-10-31 17:34:00,581][main][DEBUG] - Image(s) saved on disk
1095
+ [2025-10-31 17:34:00,789][main][INFO] - End of epoch timers: [T_train=123:40:57 | T_epoch=03:04:49 | T_eval=02:18:52 | T_total=126:14:40]
1096
+ [2025-10-31 17:34:00,790][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1097
+ [2025-10-31 17:34:11,470][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1098
+ [2025-10-31 17:34:20,752][main][INFO] - ---
1099
+
1100
+
1101
+ [2025-10-31 17:34:20,753][main][INFO] - [T_total=126:15:00 | T_train=123:40:57] Start epoch 40
1102
+ [2025-10-31 20:39:19,195][main][INFO] - [T_total=129:19:58 | T_train=126:45:55 | T_epoch=03:04:58] End of epoch 40 (273306 steps) train loss 257946
1103
+ [2025-10-31 20:39:19,196][main][INFO] - [Epoch 40] All losses: [[diffusion=0.0760113 ; kl=2.57946e+11 ; lpips=0.170735 ; repa=0.454041]]
1104
+ [2025-10-31 20:42:46,266][main][INFO] - [Epoch 41] Test metrics: [[MSE=24.79 | MAE=0.1133 | LPIPS=0.1526 | PSNR=16.06 | SSIM=0.4406 | dreamsim=0.2009 | FID=11.78]]
1105
+ [2025-10-31 20:42:46,268][main][INFO] - [Epoch 41] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1526 | max_PSNR=16.4 | max_SSIM=0.4406 | min_dreamsim=0.2009 | min_FID=11.78]]
1106
+ [2025-10-31 20:42:46,269][main][DEBUG] - Writing images to disk...
1107
+ [2025-10-31 20:42:47,095][main][DEBUG] - Image(s) saved on disk
1108
+ [2025-10-31 20:42:47,343][main][INFO] - End of epoch timers: [T_train=126:45:55 | T_epoch=03:04:58 | T_eval=02:22:20 | T_total=129:23:26]
1109
+ [2025-10-31 20:42:47,345][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1110
+ [2025-10-31 20:42:58,982][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1111
+ [2025-10-31 20:43:08,773][main][INFO] - ---
1112
+
1113
+
1114
+ [2025-10-31 20:43:08,774][main][INFO] - [T_total=129:23:48 | T_train=126:45:55] Start epoch 41
1115
+ [2025-10-31 23:47:55,833][main][INFO] - [T_total=132:28:35 | T_train=129:50:42 | T_epoch=03:04:47] End of epoch 41 (279972 steps) train loss 14.3203
1116
+ [2025-10-31 23:47:55,839][main][INFO] - [Epoch 41] All losses: [[diffusion=0.0761964 ; kl=1.40445e+07 ; lpips=0.171818 ; repa=0.454763]]
1117
+ [2025-10-31 23:51:24,059][main][INFO] - [Epoch 42] Test metrics: [[MSE=24.9 | MAE=0.1136 | LPIPS=0.1528 | PSNR=16.04 | SSIM=0.4402 | dreamsim=0.2009 | FID=11.76]]
1118
+ [2025-10-31 23:51:24,061][main][INFO] - [Epoch 42] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1526 | max_PSNR=16.4 | max_SSIM=0.4406 | min_dreamsim=0.2009 | min_FID=11.76]]
1119
+ [2025-10-31 23:51:24,063][main][DEBUG] - Writing images to disk...
1120
+ [2025-10-31 23:51:24,900][main][DEBUG] - Image(s) saved on disk
1121
+ [2025-10-31 23:51:25,140][main][INFO] - End of epoch timers: [T_train=129:50:42 | T_epoch=03:04:47 | T_eval=02:25:49 | T_total=132:32:04]
1122
+ [2025-10-31 23:51:25,141][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1123
+ [2025-10-31 23:51:35,044][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1124
+ [2025-10-31 23:51:44,524][main][INFO] - ---
1125
+
1126
+
1127
+ [2025-10-31 23:51:44,525][main][INFO] - [T_total=132:32:24 | T_train=129:50:42] Start epoch 42
1128
+ [2025-11-01 02:57:08,828][main][INFO] - [T_total=135:37:48 | T_train=132:56:07 | T_epoch=03:05:24] End of epoch 42 (286638 steps) train loss 27710.1
1129
+ [2025-11-01 02:57:08,829][main][INFO] - [Epoch 42] All losses: [[diffusion=0.0763487 ; kl=2.77098e+10 ; lpips=0.172949 ; repa=0.455514]]
1130
+ [2025-11-01 03:00:36,341][main][INFO] - [Epoch 43] Test metrics: [[MSE=24.85 | MAE=0.1135 | LPIPS=0.1524 | PSNR=16.05 | SSIM=0.4412 | dreamsim=0.2003 | FID=11.67]]
1131
+ [2025-11-01 03:00:36,343][main][INFO] - [Epoch 43] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1524 | max_PSNR=16.4 | max_SSIM=0.4412 | min_dreamsim=0.2003 | min_FID=11.67]]
1132
+ [2025-11-01 03:00:36,344][main][DEBUG] - Writing images to disk...
1133
+ [2025-11-01 03:00:37,429][main][DEBUG] - Image(s) saved on disk
1134
+ [2025-11-01 03:00:37,669][main][INFO] - End of epoch timers: [T_train=132:56:07 | T_epoch=03:05:24 | T_eval=02:29:17 | T_total=135:41:17]
1135
+ [2025-11-01 03:00:37,671][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1136
+ [2025-11-01 03:00:48,523][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1137
+ [2025-11-01 03:00:59,238][main][INFO] - ---
1138
+
1139
+
1140
+ [2025-11-01 03:00:59,239][main][INFO] - [T_total=135:41:38 | T_train=132:56:07] Start epoch 43
1141
+ [2025-11-01 06:06:17,238][main][INFO] - [T_total=138:46:56 | T_train=136:01:25 | T_epoch=03:05:17] End of epoch 43 (293304 steps) train loss 321.888
1142
+ [2025-11-01 06:06:17,239][main][INFO] - [Epoch 43] All losses: [[diffusion=0.075849 ; kl=3.21614e+08 ; lpips=0.170029 ; repa=0.452597]]
1143
+ [2025-11-01 06:09:44,706][main][INFO] - [Epoch 44] Test metrics: [[MSE=24.88 | MAE=0.1137 | LPIPS=0.1522 | PSNR=16.04 | SSIM=0.4413 | dreamsim=0.1998 | FID=11.54]]
1144
+ [2025-11-01 06:09:44,708][main][INFO] - [Epoch 44] Best metrics: [[min_MSE=22.91 | min_MAE=0.1058 | min_LPIPS=0.1522 | max_PSNR=16.4 | max_SSIM=0.4413 | min_dreamsim=0.1998 | min_FID=11.54]]
1145
+ [2025-11-01 06:09:44,709][main][DEBUG] - Writing images to disk...
1146
+ [2025-11-01 06:09:45,799][main][DEBUG] - Image(s) saved on disk
1147
+ [2025-11-01 06:09:46,004][main][INFO] - End of epoch timers: [T_train=136:01:25 | T_epoch=03:05:17 | T_eval=02:32:46 | T_total=138:50:25]
1148
+ [2025-11-01 06:09:46,005][main][INFO] - Storing model checkpoint inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/last
1149
+ [2025-11-01 06:09:57,354][main][INFO] - Best FID so far, storing a copy of the model checkpoint to /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/checkpoints/best
1150
+ [2025-11-01 06:10:08,846][main][INFO] - ---
1151
+
1152
+
1153
+ [2025-11-01 06:10:08,847][main][INFO] - [T_total=138:50:48 | T_train=136:01:25] Start epoch 44
train_enc_dc_f32c32_EqM/tensorboard_logs/events.out.tfevents.1761477560.98629b852e50.63738.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ee5d832a2211ad3b54638b31a63f0844d071bf7cce753238c04aab283eb5f06
3
+ size 154206671
train_enc_dc_f32c32_FM/.hydra/config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ task: train
3
+ runtime_path: ${hydra:runtime.cwd}
4
+ ckpt_dir: ${runtime_path}/runs
5
+ run_name: train_enc_dc_f32c32_FM
6
+ cache_dir: ${ckpt_dir}/cache
7
+ run_dir: ${ckpt_dir}/jobs/${run_name}
8
+ checkpoint_path: ${run_dir}/checkpoints
9
+ dataset:
10
+ imagenet_root: imagenet_data
11
+ im_size: 128
12
+ batch_size: 192
13
+ aug_scale: 2
14
+ limit: null
15
+ distill_teacher: false
16
+ dc_ssdae:
17
+ compile: false
18
+ checkpoint: null
19
+ encoder: f32c32
20
+ encoder_checkpoint: null
21
+ encoder_train: true
22
+ decoder: S
23
+ trainer_type: FM
24
+ encoder_type: dc
25
+ sampler:
26
+ steps: 10
27
+ ema:
28
+ decay: 0.999
29
+ start_iter: 50000
30
+ aux_losses:
31
+ compile: ${dc_ssdae.compile}
32
+ repa:
33
+ i_extract: 4
34
+ n_layers: 2
35
+ lpips: true
36
+ training:
37
+ sdpa_kernel: 2
38
+ mixed_precision: bf16
39
+ grad_accumulate: 1
40
+ grad_clip: 0.1
41
+ epochs: 20
42
+ eval_freq: 1
43
+ save_on_best: FID
44
+ log_freq: 100
45
+ lr: 0.0003
46
+ weight_decay: 0.001
47
+ losses:
48
+ diffusion: 1
49
+ repa: 0.25
50
+ lpips: 0.5
51
+ kl: 1.0e-06
52
+ show_samples: 8
train_enc_dc_f32c32_FM/.hydra/hydra.yaml ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${run_dir}
4
+ sweep:
5
+ dir: ${run_dir}
6
+ subdir: multirun_${hydra:job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ colorlog:
92
+ (): colorlog.ColoredFormatter
93
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
94
+ - %(message)s'
95
+ log_colors:
96
+ DEBUG: purple
97
+ INFO: green
98
+ WARNING: yellow
99
+ ERROR: red
100
+ CRITICAL: red
101
+ handlers:
102
+ console:
103
+ class: logging.StreamHandler
104
+ formatter: colorlog
105
+ stream: ext://sys.stdout
106
+ file:
107
+ class: logging.FileHandler
108
+ formatter: simple
109
+ filename: ${hydra:runtime.output_dir}/${hydra:job.name}.log
110
+ root:
111
+ level: INFO
112
+ handlers:
113
+ - console
114
+ - file
115
+ disable_existing_loggers: false
116
+ env: {}
117
+ mode: RUN
118
+ searchpath: []
119
+ callbacks: {}
120
+ output_subdir: .hydra
121
+ overrides:
122
+ hydra:
123
+ - hydra.mode=RUN
124
+ task:
125
+ - run_name=train_enc_dc_f32c32_FM
126
+ - dataset.im_size=128
127
+ - dataset.aug_scale=2
128
+ - training.epochs=20
129
+ - dc_ssdae.encoder_train=true
130
+ job:
131
+ name: main
132
+ chdir: null
133
+ override_dirname: dataset.aug_scale=2,dataset.im_size=128,dc_ssdae.encoder_train=true,run_name=train_enc_dc_f32c32_FM,training.epochs=20
134
+ id: ???
135
+ num: ???
136
+ config_name: dc_f32c32_FM
137
+ env_set: {}
138
+ env_copy: []
139
+ config:
140
+ override_dirname:
141
+ kv_sep: '='
142
+ item_sep: ','
143
+ exclude_keys: []
144
+ runtime:
145
+ version: 1.3.2
146
+ version_base: '1.3'
147
+ cwd: /workspace/DC_SSDAE
148
+ config_sources:
149
+ - path: hydra.conf
150
+ schema: pkg
151
+ provider: hydra
152
+ - path: /workspace/DC_SSDAE/config
153
+ schema: file
154
+ provider: main
155
+ - path: hydra_plugins.hydra_colorlog.conf
156
+ schema: pkg
157
+ provider: hydra-colorlog
158
+ - path: ''
159
+ schema: structured
160
+ provider: schema
161
+ output_dir: /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_FM
162
+ choices:
163
+ hydra/env: default
164
+ hydra/callbacks: null
165
+ hydra/job_logging: colorlog
166
+ hydra/hydra_logging: default
167
+ hydra/hydra_help: default
168
+ hydra/help: default
169
+ hydra/sweeper: basic
170
+ hydra/launcher: basic
171
+ hydra/output: default
172
+ verbose: false
train_enc_dc_f32c32_FM/.hydra/overrides.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ - run_name=train_enc_dc_f32c32_FM
2
+ - dataset.im_size=128
3
+ - dataset.aug_scale=2
4
+ - training.epochs=20
5
+ - dc_ssdae.encoder_train=true
train_enc_dc_f32c32_FM/checkpoints/best/custom_checkpoint_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b430d34a7b81d4c51cd8965a0e39388bddf8bb8708f6294838efaa318bd0e92
3
+ size 2293
train_enc_dc_f32c32_FM/checkpoints/best/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f46cd46064e01d1f0ba96ebc959bf9ddd42fd6a73fa54a3adec8fd09e3f8f5
3
+ size 968466492
train_enc_dc_f32c32_FM/checkpoints/best/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4bf720565d4c6dd1f5efaf8781daeaf13e7e9dc62abf8ae61401660badf4a2
3
+ size 968466492
train_enc_dc_f32c32_FM/checkpoints/best/model_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22c0ac0a5c37e453f5fa4c25ec76fa5f961cb6d56f1c70a171f60154645aaa9
3
+ size 598032
train_enc_dc_f32c32_FM/checkpoints/best/model_ae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f46cd46064e01d1f0ba96ebc959bf9ddd42fd6a73fa54a3adec8fd09e3f8f5
3
+ size 968466492
train_enc_dc_f32c32_FM/checkpoints/best/model_ae_ema.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4bf720565d4c6dd1f5efaf8781daeaf13e7e9dc62abf8ae61401660badf4a2
3
+ size 968466492
train_enc_dc_f32c32_FM/checkpoints/best/model_aux_losses.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22c0ac0a5c37e453f5fa4c25ec76fa5f961cb6d56f1c70a171f60154645aaa9
3
+ size 598032
train_enc_dc_f32c32_FM/checkpoints/best/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476824787533c6eb863844a3543bef24735cfeec45f2dc4d7e595082dce94312
3
+ size 1938294667
train_enc_dc_f32c32_FM/checkpoints/best/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80fc16be4fd72852da9ace45f98ab693699fb8c0dd89cb63f6aca56101ab1f85
3
+ size 16449