Training in progress, step 19000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31847bb52936d686e74359d3478c14923c1ebe168be3dabe55eeb97e094e4e4d
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a28811b9b6f21f7c3eb8dd1560ffdbc347e72ba020959bb8f5bc088d847bdc83
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40965b88e5eb22bf8d3caa10525eb8f13ed0f07bce8d46631079916b2bb29747
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c57dd490464340c9b3aa212b9d3844b50cfd1f6b44323e804e87459d3ff333e
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbc432474a75821e9b2dae49e65dbd5211090e2d95ceeb48957adde037f27b78
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68cdaa34d552da65c658445f52e286a15f93a5093fe3daa82953914a2ad847cc
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-15500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3190,6 +3190,92 @@
|
|
| 3190 |
"eval_samples_per_second": 56.419,
|
| 3191 |
"eval_steps_per_second": 14.105,
|
| 3192 |
"step": 18500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3193 |
}
|
| 3194 |
],
|
| 3195 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 18750,
|
| 3 |
+
"best_metric": 1.4757392406463623,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-15500",
|
| 5 |
+
"epoch": 1.4614260441504499,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 19000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3190 |
"eval_samples_per_second": 56.419,
|
| 3191 |
"eval_steps_per_second": 14.105,
|
| 3192 |
"step": 18500
|
| 3193 |
+
},
|
| 3194 |
+
{
|
| 3195 |
+
"epoch": 1.42681332205215,
|
| 3196 |
+
"grad_norm": 1.0700381994247437,
|
| 3197 |
+
"learning_rate": 0.0001062982105290497,
|
| 3198 |
+
"loss": 1.3846,
|
| 3199 |
+
"step": 18550
|
| 3200 |
+
},
|
| 3201 |
+
{
|
| 3202 |
+
"epoch": 1.4306591800630721,
|
| 3203 |
+
"grad_norm": 1.2848351001739502,
|
| 3204 |
+
"learning_rate": 0.0001060384905072332,
|
| 3205 |
+
"loss": 1.4874,
|
| 3206 |
+
"step": 18600
|
| 3207 |
+
},
|
| 3208 |
+
{
|
| 3209 |
+
"epoch": 1.4345050380739943,
|
| 3210 |
+
"grad_norm": 1.4261386394500732,
|
| 3211 |
+
"learning_rate": 0.00010577877048541674,
|
| 3212 |
+
"loss": 1.5771,
|
| 3213 |
+
"step": 18650
|
| 3214 |
+
},
|
| 3215 |
+
{
|
| 3216 |
+
"epoch": 1.4383508960849165,
|
| 3217 |
+
"grad_norm": 1.3613426685333252,
|
| 3218 |
+
"learning_rate": 0.00010551905046360025,
|
| 3219 |
+
"loss": 1.4445,
|
| 3220 |
+
"step": 18700
|
| 3221 |
+
},
|
| 3222 |
+
{
|
| 3223 |
+
"epoch": 1.4421967540958387,
|
| 3224 |
+
"grad_norm": 0.8625685572624207,
|
| 3225 |
+
"learning_rate": 0.00010525933044178375,
|
| 3226 |
+
"loss": 1.3845,
|
| 3227 |
+
"step": 18750
|
| 3228 |
+
},
|
| 3229 |
+
{
|
| 3230 |
+
"epoch": 1.4421967540958387,
|
| 3231 |
+
"eval_loss": 1.4757392406463623,
|
| 3232 |
+
"eval_runtime": 17.8903,
|
| 3233 |
+
"eval_samples_per_second": 55.896,
|
| 3234 |
+
"eval_steps_per_second": 13.974,
|
| 3235 |
+
"step": 18750
|
| 3236 |
+
},
|
| 3237 |
+
{
|
| 3238 |
+
"epoch": 1.446042612106761,
|
| 3239 |
+
"grad_norm": 1.5575672388076782,
|
| 3240 |
+
"learning_rate": 0.00010499961041996729,
|
| 3241 |
+
"loss": 1.5387,
|
| 3242 |
+
"step": 18800
|
| 3243 |
+
},
|
| 3244 |
+
{
|
| 3245 |
+
"epoch": 1.4498884701176833,
|
| 3246 |
+
"grad_norm": 2.2173306941986084,
|
| 3247 |
+
"learning_rate": 0.0001047398903981508,
|
| 3248 |
+
"loss": 1.459,
|
| 3249 |
+
"step": 18850
|
| 3250 |
+
},
|
| 3251 |
+
{
|
| 3252 |
+
"epoch": 1.4537343281286055,
|
| 3253 |
+
"grad_norm": 1.904809832572937,
|
| 3254 |
+
"learning_rate": 0.00010448017037633431,
|
| 3255 |
+
"loss": 1.5229,
|
| 3256 |
+
"step": 18900
|
| 3257 |
+
},
|
| 3258 |
+
{
|
| 3259 |
+
"epoch": 1.4575801861395277,
|
| 3260 |
+
"grad_norm": 1.8832893371582031,
|
| 3261 |
+
"learning_rate": 0.00010422045035451784,
|
| 3262 |
+
"loss": 1.4093,
|
| 3263 |
+
"step": 18950
|
| 3264 |
+
},
|
| 3265 |
+
{
|
| 3266 |
+
"epoch": 1.4614260441504499,
|
| 3267 |
+
"grad_norm": 1.852971076965332,
|
| 3268 |
+
"learning_rate": 0.00010396073033270135,
|
| 3269 |
+
"loss": 1.389,
|
| 3270 |
+
"step": 19000
|
| 3271 |
+
},
|
| 3272 |
+
{
|
| 3273 |
+
"epoch": 1.4614260441504499,
|
| 3274 |
+
"eval_loss": 1.4788576364517212,
|
| 3275 |
+
"eval_runtime": 17.7751,
|
| 3276 |
+
"eval_samples_per_second": 56.258,
|
| 3277 |
+
"eval_steps_per_second": 14.065,
|
| 3278 |
+
"step": 19000
|
| 3279 |
}
|
| 3280 |
],
|
| 3281 |
"logging_steps": 50,
|