Training in progress, step 31500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7dc19b8d0d5f9136639755d9316a3022b8c82a5289f75b8c2a154ea24a7ec37d
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77557a289798db28dd4dc90cd32e22abe79a243a7d8657956d33bce4d7666e79
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4263d1c23e4d38ddf0a4a29df3a3b188cb0c869be730f51b663c9aae88580c07
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34336c82053d260113435e2b759b35a50c0eba98515a30dfe9d41e121f34b323
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d859bf97583170267785e1e8320d8e4f984a79e2c33679e5f98c773a6e6e145a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d10804979bf6e76891746681a7665a759c1b00ee5b7dc26a2cd76065e4556d2b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 30000,
|
| 3 |
"best_metric": 0.9945911169052124,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-30000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5340,6 +5340,92 @@
|
|
| 5340 |
"eval_samples_per_second": 58.119,
|
| 5341 |
"eval_steps_per_second": 14.53,
|
| 5342 |
"step": 31000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5343 |
}
|
| 5344 |
],
|
| 5345 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 30000,
|
| 3 |
"best_metric": 0.9945911169052124,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-30000",
|
| 5 |
+
"epoch": 2.422890546881009,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 31500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5340 |
"eval_samples_per_second": 58.119,
|
| 5341 |
"eval_steps_per_second": 14.53,
|
| 5342 |
"step": 31000
|
| 5343 |
+
},
|
| 5344 |
+
{
|
| 5345 |
+
"epoch": 2.3882778247827092,
|
| 5346 |
+
"grad_norm": 0.8717153668403625,
|
| 5347 |
+
"learning_rate": 4.138898267667455e-05,
|
| 5348 |
+
"loss": 1.0038,
|
| 5349 |
+
"step": 31050
|
| 5350 |
+
},
|
| 5351 |
+
{
|
| 5352 |
+
"epoch": 2.392123682793631,
|
| 5353 |
+
"grad_norm": 1.007270097732544,
|
| 5354 |
+
"learning_rate": 4.112926265485807e-05,
|
| 5355 |
+
"loss": 0.9858,
|
| 5356 |
+
"step": 31100
|
| 5357 |
+
},
|
| 5358 |
+
{
|
| 5359 |
+
"epoch": 2.3959695408045536,
|
| 5360 |
+
"grad_norm": 1.9409807920455933,
|
| 5361 |
+
"learning_rate": 4.0869542633041587e-05,
|
| 5362 |
+
"loss": 1.004,
|
| 5363 |
+
"step": 31150
|
| 5364 |
+
},
|
| 5365 |
+
{
|
| 5366 |
+
"epoch": 2.3998153988154756,
|
| 5367 |
+
"grad_norm": 0.6027572154998779,
|
| 5368 |
+
"learning_rate": 4.06098226112251e-05,
|
| 5369 |
+
"loss": 0.9958,
|
| 5370 |
+
"step": 31200
|
| 5371 |
+
},
|
| 5372 |
+
{
|
| 5373 |
+
"epoch": 2.403661256826398,
|
| 5374 |
+
"grad_norm": 0.8274515867233276,
|
| 5375 |
+
"learning_rate": 4.035010258940862e-05,
|
| 5376 |
+
"loss": 0.9852,
|
| 5377 |
+
"step": 31250
|
| 5378 |
+
},
|
| 5379 |
+
{
|
| 5380 |
+
"epoch": 2.403661256826398,
|
| 5381 |
+
"eval_loss": 1.0001976490020752,
|
| 5382 |
+
"eval_runtime": 17.2233,
|
| 5383 |
+
"eval_samples_per_second": 58.061,
|
| 5384 |
+
"eval_steps_per_second": 14.515,
|
| 5385 |
+
"step": 31250
|
| 5386 |
+
},
|
| 5387 |
+
{
|
| 5388 |
+
"epoch": 2.4075071148373204,
|
| 5389 |
+
"grad_norm": 0.7238942384719849,
|
| 5390 |
+
"learning_rate": 4.009038256759214e-05,
|
| 5391 |
+
"loss": 1.0062,
|
| 5392 |
+
"step": 31300
|
| 5393 |
+
},
|
| 5394 |
+
{
|
| 5395 |
+
"epoch": 2.4113529728482423,
|
| 5396 |
+
"grad_norm": 0.8912849426269531,
|
| 5397 |
+
"learning_rate": 3.983066254577565e-05,
|
| 5398 |
+
"loss": 0.9802,
|
| 5399 |
+
"step": 31350
|
| 5400 |
+
},
|
| 5401 |
+
{
|
| 5402 |
+
"epoch": 2.4151988308591648,
|
| 5403 |
+
"grad_norm": 1.1922829151153564,
|
| 5404 |
+
"learning_rate": 3.957094252395918e-05,
|
| 5405 |
+
"loss": 1.0317,
|
| 5406 |
+
"step": 31400
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 2.4190446888700867,
|
| 5410 |
+
"grad_norm": 1.3773999214172363,
|
| 5411 |
+
"learning_rate": 3.931122250214269e-05,
|
| 5412 |
+
"loss": 0.9773,
|
| 5413 |
+
"step": 31450
|
| 5414 |
+
},
|
| 5415 |
+
{
|
| 5416 |
+
"epoch": 2.422890546881009,
|
| 5417 |
+
"grad_norm": 1.0747745037078857,
|
| 5418 |
+
"learning_rate": 3.905150248032621e-05,
|
| 5419 |
+
"loss": 1.0208,
|
| 5420 |
+
"step": 31500
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 2.422890546881009,
|
| 5424 |
+
"eval_loss": 1.0009056329727173,
|
| 5425 |
+
"eval_runtime": 17.1069,
|
| 5426 |
+
"eval_samples_per_second": 58.456,
|
| 5427 |
+
"eval_steps_per_second": 14.614,
|
| 5428 |
+
"step": 31500
|
| 5429 |
}
|
| 5430 |
],
|
| 5431 |
"logging_steps": 50,
|