cat-searcher
commited on
Commit
•
9f1bdac
1
Parent(s):
8f3a603
Training in progress, epoch 28, checkpoint
Browse files- last-checkpoint/global_step5718/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5718/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step5718/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd38c8d53cef6a27f96f7b421d3d5acb19ce05f445d6d62035a1e97e79f3f627
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c89a6cfa9861549dfcd1590200365ce0c25a15d698599c5baa2997fa1792463
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fdba19db1d418e7b2ee177ec5fe1c0e2ccf008a0275c8f937268506bce9f05d
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42cee2fd79cd1d6b6c3a0ee111ad23945fdc848b6f088f9fc721f86c321c5fbc
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f93875499f8ce0120e6376255ad48ae9397e785acb633fde389a6b2ac49e11c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b49d8c24ca0c52538af8feffe99d3278097e67a82b3dfb164a19abf75c1c5dd
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e093b6caed17faa9268d407698a34ff3a5353d8f6b104dada895df571c3a25e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74f30ead704a9cc1b7b066f4a45d9736525415f9a92a8c31f4bac15e7e7b9763
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5718/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8252e479868a5e0a4dfa749f80a1e8d524dd4c975c140119cfbd5abd9bf8afec
|
3 |
+
size 85570
|
last-checkpoint/global_step5718/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feeb32706d269794adf5b59514242cb1708a15909b313f060f2aef30bae63262
|
3 |
+
size 85506
|
last-checkpoint/global_step5718/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:335cd955b91a12c7f066b1d5d2dbb9ecb39d21ac0b6de5e5b7fe280032ce5a7e
|
3 |
+
size 85506
|
last-checkpoint/global_step5718/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db19ca119d186937dfec156f64ed5185ab10005181a4f3d860a9d55af32a8feb
|
3 |
+
size 85506
|
last-checkpoint/global_step5718/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14a134d7d307b5c30e4ad6723ea24295f2a5c589def33cb8ff5981d5b9632b0b
|
3 |
+
size 85506
|
last-checkpoint/global_step5718/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5101f24995338e1d56a12ada0fbc5ad6e72c43db79ad0c51f12b9255a2075ca0
|
3 |
+
size 85506
|
last-checkpoint/global_step5718/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fdccfa685649f28110d93e0604078495d43dc0a91bcf95d6794dfa8a6409447
|
3 |
+
size 85506
|
last-checkpoint/global_step5718/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38dcdf96ca2a336a1bbe49a3b62871afbdf6b077d44e4602872b45e38fcfb1b4
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step5718
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfa3379a77e192ab0af64fe78334db958cf6214addb3b4fbbc67569fb9f2e836
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c217d7738abb0675dccca5614343dba054b625a983adb5b66bb33a2cf128b5c4
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a205f64c86d241517037857e791fc7cdcfd1b2d8a26ca46ff4e6430fc9491c64
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:384e04a3f342b13aa2aff82b03d67994ff48a7b6e7d90ad53291b0ccf1124755
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c75af15b2dde4bfa82f45cc3b48588a123bca20dd4b565a0312d5c1198d8bca9
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8162e1d617d2045c999376967bb33455e9055c2882d00e9e6c3e5639106c7cf2
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37df018beaaf1bcb0a0451dcfd03f257c34b245315c9d5243bad309614abb972
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc45f95eb705f82fae244f5f5bb1d1d060492c8b55aba2aa58162687e28952bc
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8ed548560db42b0406ce39f0d4bce5fac64c200cc3ab9248f1dd703dde9dfef
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:865e7aa49ec74e2e8e5ffbc2b62c9edb6308476119fe3e77f2fe29961dd5deaf
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3921aadd9f9af170d702817ed4b4c15515108ff8717773c58f7ba3567a43d1be
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 28.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8302,6 +8302,291 @@
|
|
8302 |
"rewards/margins": 0.5627579689025879,
|
8303 |
"rewards/rejected": -0.3508843779563904,
|
8304 |
"step": 5520
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8305 |
}
|
8306 |
],
|
8307 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 28.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 5718,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8302 |
"rewards/margins": 0.5627579689025879,
|
8303 |
"rewards/rejected": -0.3508843779563904,
|
8304 |
"step": 5520
|
8305 |
+
},
|
8306 |
+
{
|
8307 |
+
"epoch": 28.045569620253165,
|
8308 |
+
"grad_norm": 221196.22582529782,
|
8309 |
+
"learning_rate": 1.2237543089940458e-07,
|
8310 |
+
"logits/chosen": -2.0594754219055176,
|
8311 |
+
"logits/rejected": -0.8701013326644897,
|
8312 |
+
"logps/chosen": -27.682659149169922,
|
8313 |
+
"logps/rejected": -595.9862060546875,
|
8314 |
+
"loss": 12458.1609,
|
8315 |
+
"rewards/accuracies": 1.0,
|
8316 |
+
"rewards/chosen": 0.21054939925670624,
|
8317 |
+
"rewards/margins": 0.5732256174087524,
|
8318 |
+
"rewards/rejected": -0.362676203250885,
|
8319 |
+
"step": 5530
|
8320 |
+
},
|
8321 |
+
{
|
8322 |
+
"epoch": 28.09620253164557,
|
8323 |
+
"grad_norm": 270569.55775822,
|
8324 |
+
"learning_rate": 1.2159197743654026e-07,
|
8325 |
+
"logits/chosen": -1.7393264770507812,
|
8326 |
+
"logits/rejected": -1.1281194686889648,
|
8327 |
+
"logps/chosen": -24.0075626373291,
|
8328 |
+
"logps/rejected": -583.3763427734375,
|
8329 |
+
"loss": 12198.0859,
|
8330 |
+
"rewards/accuracies": 1.0,
|
8331 |
+
"rewards/chosen": 0.20713207125663757,
|
8332 |
+
"rewards/margins": 0.5608252286911011,
|
8333 |
+
"rewards/rejected": -0.35369327664375305,
|
8334 |
+
"step": 5540
|
8335 |
+
},
|
8336 |
+
{
|
8337 |
+
"epoch": 28.146835443037975,
|
8338 |
+
"grad_norm": 294202.9420634267,
|
8339 |
+
"learning_rate": 1.2080852397367596e-07,
|
8340 |
+
"logits/chosen": -1.1881496906280518,
|
8341 |
+
"logits/rejected": -1.9830278158187866,
|
8342 |
+
"logps/chosen": -27.52215003967285,
|
8343 |
+
"logps/rejected": -571.5521240234375,
|
8344 |
+
"loss": 12035.3297,
|
8345 |
+
"rewards/accuracies": 0.9750000238418579,
|
8346 |
+
"rewards/chosen": 0.198753222823143,
|
8347 |
+
"rewards/margins": 0.5402897596359253,
|
8348 |
+
"rewards/rejected": -0.34153658151626587,
|
8349 |
+
"step": 5550
|
8350 |
+
},
|
8351 |
+
{
|
8352 |
+
"epoch": 28.19746835443038,
|
8353 |
+
"grad_norm": 250256.82251298483,
|
8354 |
+
"learning_rate": 1.2002507051081164e-07,
|
8355 |
+
"logits/chosen": -2.069603443145752,
|
8356 |
+
"logits/rejected": -1.1806148290634155,
|
8357 |
+
"logps/chosen": -32.80065155029297,
|
8358 |
+
"logps/rejected": -584.74169921875,
|
8359 |
+
"loss": 12394.9164,
|
8360 |
+
"rewards/accuracies": 1.0,
|
8361 |
+
"rewards/chosen": 0.21380552649497986,
|
8362 |
+
"rewards/margins": 0.5591468811035156,
|
8363 |
+
"rewards/rejected": -0.345341295003891,
|
8364 |
+
"step": 5560
|
8365 |
+
},
|
8366 |
+
{
|
8367 |
+
"epoch": 28.248101265822786,
|
8368 |
+
"grad_norm": 295332.47087175207,
|
8369 |
+
"learning_rate": 1.1924161704794735e-07,
|
8370 |
+
"logits/chosen": -1.0471255779266357,
|
8371 |
+
"logits/rejected": -0.4857943654060364,
|
8372 |
+
"logps/chosen": -21.058979034423828,
|
8373 |
+
"logps/rejected": -572.4304809570312,
|
8374 |
+
"loss": 12514.1922,
|
8375 |
+
"rewards/accuracies": 0.987500011920929,
|
8376 |
+
"rewards/chosen": 0.1976846158504486,
|
8377 |
+
"rewards/margins": 0.548615574836731,
|
8378 |
+
"rewards/rejected": -0.35093095898628235,
|
8379 |
+
"step": 5570
|
8380 |
+
},
|
8381 |
+
{
|
8382 |
+
"epoch": 28.29873417721519,
|
8383 |
+
"grad_norm": 206143.0104728106,
|
8384 |
+
"learning_rate": 1.1845816358508304e-07,
|
8385 |
+
"logits/chosen": -2.556028366088867,
|
8386 |
+
"logits/rejected": -1.9551620483398438,
|
8387 |
+
"logps/chosen": -35.4798698425293,
|
8388 |
+
"logps/rejected": -586.085693359375,
|
8389 |
+
"loss": 12969.0906,
|
8390 |
+
"rewards/accuracies": 0.987500011920929,
|
8391 |
+
"rewards/chosen": 0.20620682835578918,
|
8392 |
+
"rewards/margins": 0.5480517148971558,
|
8393 |
+
"rewards/rejected": -0.34184494614601135,
|
8394 |
+
"step": 5580
|
8395 |
+
},
|
8396 |
+
{
|
8397 |
+
"epoch": 28.349367088607593,
|
8398 |
+
"grad_norm": 264961.57508088043,
|
8399 |
+
"learning_rate": 1.1767471012221873e-07,
|
8400 |
+
"logits/chosen": -1.3203023672103882,
|
8401 |
+
"logits/rejected": -0.41819173097610474,
|
8402 |
+
"logps/chosen": -35.34379959106445,
|
8403 |
+
"logps/rejected": -582.7780151367188,
|
8404 |
+
"loss": 11659.4484,
|
8405 |
+
"rewards/accuracies": 0.987500011920929,
|
8406 |
+
"rewards/chosen": 0.21271836757659912,
|
8407 |
+
"rewards/margins": 0.5540838837623596,
|
8408 |
+
"rewards/rejected": -0.3413654863834381,
|
8409 |
+
"step": 5590
|
8410 |
+
},
|
8411 |
+
{
|
8412 |
+
"epoch": 28.4,
|
8413 |
+
"grad_norm": 475056.8558156038,
|
8414 |
+
"learning_rate": 1.1689125665935443e-07,
|
8415 |
+
"logits/chosen": 0.2806483507156372,
|
8416 |
+
"logits/rejected": 0.8025129437446594,
|
8417 |
+
"logps/chosen": -32.23934555053711,
|
8418 |
+
"logps/rejected": -571.2871704101562,
|
8419 |
+
"loss": 12715.6078,
|
8420 |
+
"rewards/accuracies": 0.987500011920929,
|
8421 |
+
"rewards/chosen": 0.20215098559856415,
|
8422 |
+
"rewards/margins": 0.5371149778366089,
|
8423 |
+
"rewards/rejected": -0.33496397733688354,
|
8424 |
+
"step": 5600
|
8425 |
+
},
|
8426 |
+
{
|
8427 |
+
"epoch": 28.450632911392404,
|
8428 |
+
"grad_norm": 197134.08463352287,
|
8429 |
+
"learning_rate": 1.1610780319649012e-07,
|
8430 |
+
"logits/chosen": -0.42449599504470825,
|
8431 |
+
"logits/rejected": 0.20755800604820251,
|
8432 |
+
"logps/chosen": -30.612218856811523,
|
8433 |
+
"logps/rejected": -578.2881469726562,
|
8434 |
+
"loss": 12352.6281,
|
8435 |
+
"rewards/accuracies": 0.987500011920929,
|
8436 |
+
"rewards/chosen": 0.2100987732410431,
|
8437 |
+
"rewards/margins": 0.5525861978530884,
|
8438 |
+
"rewards/rejected": -0.34248748421669006,
|
8439 |
+
"step": 5610
|
8440 |
+
},
|
8441 |
+
{
|
8442 |
+
"epoch": 28.50126582278481,
|
8443 |
+
"grad_norm": 345771.22083863505,
|
8444 |
+
"learning_rate": 1.1532434973362581e-07,
|
8445 |
+
"logits/chosen": -1.5660457611083984,
|
8446 |
+
"logits/rejected": -0.7327693700790405,
|
8447 |
+
"logps/chosen": -22.58321762084961,
|
8448 |
+
"logps/rejected": -566.7658081054688,
|
8449 |
+
"loss": 11851.4547,
|
8450 |
+
"rewards/accuracies": 0.987500011920929,
|
8451 |
+
"rewards/chosen": 0.19859129190444946,
|
8452 |
+
"rewards/margins": 0.5416545271873474,
|
8453 |
+
"rewards/rejected": -0.3430632948875427,
|
8454 |
+
"step": 5620
|
8455 |
+
},
|
8456 |
+
{
|
8457 |
+
"epoch": 28.551898734177215,
|
8458 |
+
"grad_norm": 168585.12783774585,
|
8459 |
+
"learning_rate": 1.145408962707615e-07,
|
8460 |
+
"logits/chosen": -0.2972283363342285,
|
8461 |
+
"logits/rejected": -0.3674158453941345,
|
8462 |
+
"logps/chosen": -29.131546020507812,
|
8463 |
+
"logps/rejected": -597.9761962890625,
|
8464 |
+
"loss": 11512.6836,
|
8465 |
+
"rewards/accuracies": 1.0,
|
8466 |
+
"rewards/chosen": 0.21312180161476135,
|
8467 |
+
"rewards/margins": 0.5691269040107727,
|
8468 |
+
"rewards/rejected": -0.35600510239601135,
|
8469 |
+
"step": 5630
|
8470 |
+
},
|
8471 |
+
{
|
8472 |
+
"epoch": 28.60253164556962,
|
8473 |
+
"grad_norm": 208909.02036407648,
|
8474 |
+
"learning_rate": 1.137574428078972e-07,
|
8475 |
+
"logits/chosen": -1.0681560039520264,
|
8476 |
+
"logits/rejected": -0.39094457030296326,
|
8477 |
+
"logps/chosen": -36.012596130371094,
|
8478 |
+
"logps/rejected": -586.0516357421875,
|
8479 |
+
"loss": 12808.1297,
|
8480 |
+
"rewards/accuracies": 0.987500011920929,
|
8481 |
+
"rewards/chosen": 0.20493356883525848,
|
8482 |
+
"rewards/margins": 0.5560083389282227,
|
8483 |
+
"rewards/rejected": -0.3510746955871582,
|
8484 |
+
"step": 5640
|
8485 |
+
},
|
8486 |
+
{
|
8487 |
+
"epoch": 28.653164556962025,
|
8488 |
+
"grad_norm": 311846.42372199957,
|
8489 |
+
"learning_rate": 1.1297398934503289e-07,
|
8490 |
+
"logits/chosen": 0.5105953216552734,
|
8491 |
+
"logits/rejected": 1.009169101715088,
|
8492 |
+
"logps/chosen": -27.007156372070312,
|
8493 |
+
"logps/rejected": -599.7515258789062,
|
8494 |
+
"loss": 11991.1812,
|
8495 |
+
"rewards/accuracies": 1.0,
|
8496 |
+
"rewards/chosen": 0.21386167407035828,
|
8497 |
+
"rewards/margins": 0.572094202041626,
|
8498 |
+
"rewards/rejected": -0.3582325577735901,
|
8499 |
+
"step": 5650
|
8500 |
+
},
|
8501 |
+
{
|
8502 |
+
"epoch": 28.70379746835443,
|
8503 |
+
"grad_norm": 268717.3291778612,
|
8504 |
+
"learning_rate": 1.1219053588216858e-07,
|
8505 |
+
"logits/chosen": -0.8255645036697388,
|
8506 |
+
"logits/rejected": -0.8527682423591614,
|
8507 |
+
"logps/chosen": -21.579692840576172,
|
8508 |
+
"logps/rejected": -585.7736206054688,
|
8509 |
+
"loss": 12543.1672,
|
8510 |
+
"rewards/accuracies": 1.0,
|
8511 |
+
"rewards/chosen": 0.2072766274213791,
|
8512 |
+
"rewards/margins": 0.5643941760063171,
|
8513 |
+
"rewards/rejected": -0.35711759328842163,
|
8514 |
+
"step": 5660
|
8515 |
+
},
|
8516 |
+
{
|
8517 |
+
"epoch": 28.754430379746836,
|
8518 |
+
"grad_norm": 251846.43966430755,
|
8519 |
+
"learning_rate": 1.1140708241930429e-07,
|
8520 |
+
"logits/chosen": -1.6031732559204102,
|
8521 |
+
"logits/rejected": -0.6178330779075623,
|
8522 |
+
"logps/chosen": -30.777385711669922,
|
8523 |
+
"logps/rejected": -578.4786376953125,
|
8524 |
+
"loss": 12538.3711,
|
8525 |
+
"rewards/accuracies": 0.987500011920929,
|
8526 |
+
"rewards/chosen": 0.20804066956043243,
|
8527 |
+
"rewards/margins": 0.5539838075637817,
|
8528 |
+
"rewards/rejected": -0.3459431827068329,
|
8529 |
+
"step": 5670
|
8530 |
+
},
|
8531 |
+
{
|
8532 |
+
"epoch": 28.80506329113924,
|
8533 |
+
"grad_norm": 362284.87054599967,
|
8534 |
+
"learning_rate": 1.1062362895643998e-07,
|
8535 |
+
"logits/chosen": -0.6538245677947998,
|
8536 |
+
"logits/rejected": -0.3702305555343628,
|
8537 |
+
"logps/chosen": -26.261932373046875,
|
8538 |
+
"logps/rejected": -577.5454711914062,
|
8539 |
+
"loss": 13020.9297,
|
8540 |
+
"rewards/accuracies": 1.0,
|
8541 |
+
"rewards/chosen": 0.20354709029197693,
|
8542 |
+
"rewards/margins": 0.5529359579086304,
|
8543 |
+
"rewards/rejected": -0.34938886761665344,
|
8544 |
+
"step": 5680
|
8545 |
+
},
|
8546 |
+
{
|
8547 |
+
"epoch": 28.855696202531647,
|
8548 |
+
"grad_norm": 205761.31564795828,
|
8549 |
+
"learning_rate": 1.0984017549357568e-07,
|
8550 |
+
"logits/chosen": -2.4821999073028564,
|
8551 |
+
"logits/rejected": -2.7491514682769775,
|
8552 |
+
"logps/chosen": -33.985538482666016,
|
8553 |
+
"logps/rejected": -588.4347534179688,
|
8554 |
+
"loss": 12498.5609,
|
8555 |
+
"rewards/accuracies": 1.0,
|
8556 |
+
"rewards/chosen": 0.2070481777191162,
|
8557 |
+
"rewards/margins": 0.5542899370193481,
|
8558 |
+
"rewards/rejected": -0.34724172949790955,
|
8559 |
+
"step": 5690
|
8560 |
+
},
|
8561 |
+
{
|
8562 |
+
"epoch": 28.906329113924052,
|
8563 |
+
"grad_norm": 269249.39255954395,
|
8564 |
+
"learning_rate": 1.0905672203071137e-07,
|
8565 |
+
"logits/chosen": 0.24643035233020782,
|
8566 |
+
"logits/rejected": 0.39688020944595337,
|
8567 |
+
"logps/chosen": -22.365093231201172,
|
8568 |
+
"logps/rejected": -572.4009399414062,
|
8569 |
+
"loss": 12145.2008,
|
8570 |
+
"rewards/accuracies": 1.0,
|
8571 |
+
"rewards/chosen": 0.20018497109413147,
|
8572 |
+
"rewards/margins": 0.5481060147285461,
|
8573 |
+
"rewards/rejected": -0.3479210138320923,
|
8574 |
+
"step": 5700
|
8575 |
+
},
|
8576 |
+
{
|
8577 |
+
"epoch": 28.956962025316457,
|
8578 |
+
"grad_norm": 274939.7399900873,
|
8579 |
+
"learning_rate": 1.0827326856784706e-07,
|
8580 |
+
"logits/chosen": 0.24328431487083435,
|
8581 |
+
"logits/rejected": 0.056040357798337936,
|
8582 |
+
"logps/chosen": -27.859899520874023,
|
8583 |
+
"logps/rejected": -582.8697509765625,
|
8584 |
+
"loss": 12038.6203,
|
8585 |
+
"rewards/accuracies": 1.0,
|
8586 |
+
"rewards/chosen": 0.20646706223487854,
|
8587 |
+
"rewards/margins": 0.5581387281417847,
|
8588 |
+
"rewards/rejected": -0.35167163610458374,
|
8589 |
+
"step": 5710
|
8590 |
}
|
8591 |
],
|
8592 |
"logging_steps": 10,
|