Training in progress, step 248, checkpoint
Browse files- last-checkpoint/global_step248/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step248/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step248/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step248/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step248/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step248/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step248/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step248/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00003.safetensors +1 -1
- last-checkpoint/model-00002-of-00003.safetensors +1 -1
- last-checkpoint/model-00003-of-00003.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +108 -2
last-checkpoint/global_step248/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b08794cdafca4a2405e677b4792eeb1c4002d519acd0473d1c10e1aa565d567
|
3 |
+
size 14483467880
|
last-checkpoint/global_step248/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d75109bb6240b53ff9e71157b60bdbb1c7d170e52510b3c30529adc92679d162
|
3 |
+
size 14483467880
|
last-checkpoint/global_step248/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dfede4f8422dc2803797167730273a781c8af94b515f7db405e151010938e66
|
3 |
+
size 14483467880
|
last-checkpoint/global_step248/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c1c917c53e44b8ef80c8cec62dc0938ce8ae27a5e900bb3f07ad29001135cdb
|
3 |
+
size 14483467880
|
last-checkpoint/global_step248/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b12a4cac7a1cb413656ed2b84b87dac9dd2b5c9457a0a6b56887bb9e066fcf64
|
3 |
+
size 150629
|
last-checkpoint/global_step248/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d41d0da714359fbf813c0a5eb00cf5143b1943361a731e52e40897f07173521
|
3 |
+
size 150629
|
last-checkpoint/global_step248/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c98308f6801467dcbda81513e0a255846ff8d2d0d1ca1b0b62562138abe897d
|
3 |
+
size 150629
|
last-checkpoint/global_step248/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41447a8be1f5647c7283a2d60028ccc22a8c7b8d8d6373e6bc59d2818a31c0dc
|
3 |
+
size 150629
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step248
|
last-checkpoint/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa70bb996156bbded8548797ca1f4ee1575a045cddb2a9cc7df4062722aeb4c9
|
3 |
size 4943162336
|
last-checkpoint/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:222edc16474f6eae34b4b7b14958aa7eed9b3da9527638f098fa5434ee77df8f
|
3 |
size 4999819336
|
last-checkpoint/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bb1df50fb89ebcc0fba450c7354cf3779333974ca8c122499541771dad338b0
|
3 |
size 4540516344
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24e15ff45212eaf6c8decc16dc22c41c4ce080a39ad2deb8865f5b3888d4efb2
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0766646143405ae7d8761387ef336c681649fc67c22c6b6fd693a70b83271308
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:705dd3e24877a6ecb456032edc7da887e9e915c7d513964cecbec240a9a9513b
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bd4f44e10fc7beca1269b92451cc181ceccf15015c11f280a598b7ca4e6b5fa
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c67f7ec77e804e39476b9d19eb7621ae6832d1f14416eb0651e4d7ceac525a87
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 62,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -340,6 +340,112 @@
|
|
340 |
"eval_samples_per_second": 5.484,
|
341 |
"eval_steps_per_second": 0.356,
|
342 |
"step": 186
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
}
|
344 |
],
|
345 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7936,
|
5 |
"eval_steps": 62,
|
6 |
+
"global_step": 248,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
340 |
"eval_samples_per_second": 5.484,
|
341 |
"eval_steps_per_second": 0.356,
|
342 |
"step": 186
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"epoch": 0.608,
|
346 |
+
"grad_norm": 86.7511219785863,
|
347 |
+
"learning_rate": 4.429928741092636e-07,
|
348 |
+
"logits/generated": -2.4148917198181152,
|
349 |
+
"logits/real": -2.3717541694641113,
|
350 |
+
"logps/generated": -118.5252685546875,
|
351 |
+
"logps/real": -148.78070068359375,
|
352 |
+
"loss": 0.7774,
|
353 |
+
"rewards/accuracies": 0.75,
|
354 |
+
"rewards/generated": 0.8947796821594238,
|
355 |
+
"rewards/margins": 0.7275075912475586,
|
356 |
+
"rewards/real": 1.6222871541976929,
|
357 |
+
"step": 190
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"epoch": 0.64,
|
361 |
+
"grad_norm": 81.51532186606975,
|
362 |
+
"learning_rate": 4.3705463182897863e-07,
|
363 |
+
"logits/generated": -2.333613872528076,
|
364 |
+
"logits/real": -2.317903757095337,
|
365 |
+
"logps/generated": -106.9345703125,
|
366 |
+
"logps/real": -122.63179779052734,
|
367 |
+
"loss": 0.7231,
|
368 |
+
"rewards/accuracies": 0.7250000238418579,
|
369 |
+
"rewards/generated": 1.007331132888794,
|
370 |
+
"rewards/margins": 0.8109349012374878,
|
371 |
+
"rewards/real": 1.8182659149169922,
|
372 |
+
"step": 200
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"epoch": 0.672,
|
376 |
+
"grad_norm": 66.81550093382229,
|
377 |
+
"learning_rate": 4.311163895486936e-07,
|
378 |
+
"logits/generated": -2.3513429164886475,
|
379 |
+
"logits/real": -2.4845714569091797,
|
380 |
+
"logps/generated": -115.06642150878906,
|
381 |
+
"logps/real": -125.45219421386719,
|
382 |
+
"loss": 0.7643,
|
383 |
+
"rewards/accuracies": 0.625,
|
384 |
+
"rewards/generated": 1.3189352750778198,
|
385 |
+
"rewards/margins": 0.4965497851371765,
|
386 |
+
"rewards/real": 1.8154850006103516,
|
387 |
+
"step": 210
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 0.704,
|
391 |
+
"grad_norm": 69.67950191082039,
|
392 |
+
"learning_rate": 4.251781472684085e-07,
|
393 |
+
"logits/generated": -2.458786725997925,
|
394 |
+
"logits/real": -2.4684672355651855,
|
395 |
+
"logps/generated": -104.95294189453125,
|
396 |
+
"logps/real": -110.86625671386719,
|
397 |
+
"loss": 0.7933,
|
398 |
+
"rewards/accuracies": 0.6000000238418579,
|
399 |
+
"rewards/generated": 1.5485458374023438,
|
400 |
+
"rewards/margins": 0.44912296533584595,
|
401 |
+
"rewards/real": 1.9976688623428345,
|
402 |
+
"step": 220
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"epoch": 0.736,
|
406 |
+
"grad_norm": 67.77084563505426,
|
407 |
+
"learning_rate": 4.192399049881235e-07,
|
408 |
+
"logits/generated": -2.4709246158599854,
|
409 |
+
"logits/real": -2.4637975692749023,
|
410 |
+
"logps/generated": -109.6999282836914,
|
411 |
+
"logps/real": -118.01756286621094,
|
412 |
+
"loss": 0.7679,
|
413 |
+
"rewards/accuracies": 0.8125,
|
414 |
+
"rewards/generated": 1.0844991207122803,
|
415 |
+
"rewards/margins": 0.8938215374946594,
|
416 |
+
"rewards/real": 1.978320837020874,
|
417 |
+
"step": 230
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"epoch": 0.768,
|
421 |
+
"grad_norm": 72.48546532520147,
|
422 |
+
"learning_rate": 4.1330166270783846e-07,
|
423 |
+
"logits/generated": -2.4811666011810303,
|
424 |
+
"logits/real": -2.499788284301758,
|
425 |
+
"logps/generated": -121.445068359375,
|
426 |
+
"logps/real": -134.60311889648438,
|
427 |
+
"loss": 0.7502,
|
428 |
+
"rewards/accuracies": 0.6499999761581421,
|
429 |
+
"rewards/generated": 1.3976621627807617,
|
430 |
+
"rewards/margins": 0.5193124413490295,
|
431 |
+
"rewards/real": 1.916974425315857,
|
432 |
+
"step": 240
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"epoch": 0.7936,
|
436 |
+
"eval_logits/generated": -2.5031919479370117,
|
437 |
+
"eval_logits/real": -2.5135350227355957,
|
438 |
+
"eval_logps/generated": -111.80171966552734,
|
439 |
+
"eval_logps/real": -123.52568054199219,
|
440 |
+
"eval_loss": 0.7087541222572327,
|
441 |
+
"eval_rewards/accuracies": 0.6346153616905212,
|
442 |
+
"eval_rewards/generated": 0.9513813257217407,
|
443 |
+
"eval_rewards/margins": 0.5874653458595276,
|
444 |
+
"eval_rewards/real": 1.538846731185913,
|
445 |
+
"eval_runtime": 37.1383,
|
446 |
+
"eval_samples_per_second": 5.385,
|
447 |
+
"eval_steps_per_second": 0.35,
|
448 |
+
"step": 248
|
449 |
}
|
450 |
],
|
451 |
"logging_steps": 10,
|