Training in progress, epoch 22, checkpoint
Browse files- last-checkpoint/global_step4533/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4533/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step4533/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67fe3bbb691c976edec39fc9edffbb6b2cf820e02c17331efddc8fa4ac7dee36
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2526a90420c32897527f16232eadadc8198f30539e7b9603db4e3095336a4bac
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f996c91286c656d7c1d15b4586136bf69bf32b8fe12fcfae3c6230229b575d0
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01a748747c27beba355539bbe7761cb736a916c3ec59fcb7d13bf0c4de8ebf81
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6412c6d17d8e7a9916cb406210b3c1c66750e850f003a5d060d9cc96a9c416b2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc696da83cf3e9de2ddb484e0bfba72cab42bd7855b4280d903b74516e38a7b3
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7db4e8b975b1ebb83514b08dddacfb6f12b9b5cac0438d166f59e6d5dc20147c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6181d650cdedc38d18bf685928c50089fad739e9605d67fdc07d4333840594d2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4533/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aad6cf1e90722aa0bded9997f432975f9ac26246c04c133866fd46a50a3a486f
|
3 |
+
size 85570
|
last-checkpoint/global_step4533/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd4a1345616e6896e2de6e8fcd0ca3739f245412a6f54e02cfeebffb6a198bfb
|
3 |
+
size 85506
|
last-checkpoint/global_step4533/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdd3016ed6fe1eacc8c1cabc3b7f91cde643ba6e2c010cfbd4178c073605c763
|
3 |
+
size 85506
|
last-checkpoint/global_step4533/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40298aaeb4130d9e00dbbd161f7627bf1dbcea3137aec363457f42485c0cc4ea
|
3 |
+
size 85506
|
last-checkpoint/global_step4533/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4251ba385bb1157c4aaeead83fb9f7731efc285227f30464563868151c914d6f
|
3 |
+
size 85506
|
last-checkpoint/global_step4533/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fadbb60dc81290ffb98ff71ce4da2ba824de319edc1a076297ed87cb8c65003
|
3 |
+
size 85506
|
last-checkpoint/global_step4533/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b8149b7336b4255d3b4ec501fa71066f28d3daedfe26d40e2af38a4353fbbc4
|
3 |
+
size 85506
|
last-checkpoint/global_step4533/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c6345bbc9be12cb5ac4a597dbc0740ee29c21bee3e5fd186dc3a3d3abfb4948
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step4533
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5749e6ea106dcd62f8a5403e9ab2697a968659c99e52d97cf37770a6dcf0de89
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9913d4ed78dfdf1f7884966d3d5a627d1a427e9dfd802c854f7139effd18d42
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f43f0f70b0dbcc678819df90ae1498097a1a40b141c1b3634f47415b4753e46
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fea0bf58d3a4bfd4cd6e77b671fd76d0f08457d71ef115ed0b7a79d932ccecb
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:485de881ac346a0d6293f173c620168e2c1c78bb56e9d35b719fe68b9599381c
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb3995c042fb6720f0e6292f2cdbc61899166b759822de9794a63f951332b53d
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dc5d0e51cb62121ef22456f6c04b602c2ee63280f1aa9215b5f6ca9ffd945a1
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aaf6f7a95413d841f5781a52bd24e6916b171c6b1eae7a534a1e53e4a4d75dda
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8b59e0c70f333ceeb5e4b4cd778af69cd56a780cd649d81f2b1589c1edb774f
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab4acc6cc8c266eed2b1e03455904bdc72f90eae6636da6366d4a9d998a1c278
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d43ad6dc2ceca1f9dc036c87b7f18db786223a3aca759dd3644262f6faf9977
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 22.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6517,6 +6517,306 @@
|
|
6517 |
"rewards/margins": 0.5605840086936951,
|
6518 |
"rewards/rejected": -0.35054340958595276,
|
6519 |
"step": 4330
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6520 |
}
|
6521 |
],
|
6522 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 22.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 4533,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6517 |
"rewards/margins": 0.5605840086936951,
|
6518 |
"rewards/rejected": -0.35054340958595276,
|
6519 |
"step": 4330
|
6520 |
+
},
|
6521 |
+
{
|
6522 |
+
"epoch": 22.020253164556962,
|
6523 |
+
"grad_norm": 844000.7864919893,
|
6524 |
+
"learning_rate": 2.1560639298025696e-07,
|
6525 |
+
"logits/chosen": -0.2103087455034256,
|
6526 |
+
"logits/rejected": 0.07530391216278076,
|
6527 |
+
"logps/chosen": -30.565990447998047,
|
6528 |
+
"logps/rejected": -547.0203857421875,
|
6529 |
+
"loss": 14383.6906,
|
6530 |
+
"rewards/accuracies": 1.0,
|
6531 |
+
"rewards/chosen": 0.1880597323179245,
|
6532 |
+
"rewards/margins": 0.5152319073677063,
|
6533 |
+
"rewards/rejected": -0.3271721601486206,
|
6534 |
+
"step": 4340
|
6535 |
+
},
|
6536 |
+
{
|
6537 |
+
"epoch": 22.070886075949367,
|
6538 |
+
"grad_norm": 597784.613899612,
|
6539 |
+
"learning_rate": 2.1482293951739267e-07,
|
6540 |
+
"logits/chosen": -0.721124529838562,
|
6541 |
+
"logits/rejected": -0.21510323882102966,
|
6542 |
+
"logps/chosen": -37.94996643066406,
|
6543 |
+
"logps/rejected": -587.461181640625,
|
6544 |
+
"loss": 13822.8656,
|
6545 |
+
"rewards/accuracies": 1.0,
|
6546 |
+
"rewards/chosen": 0.19957685470581055,
|
6547 |
+
"rewards/margins": 0.5496448278427124,
|
6548 |
+
"rewards/rejected": -0.3500679135322571,
|
6549 |
+
"step": 4350
|
6550 |
+
},
|
6551 |
+
{
|
6552 |
+
"epoch": 22.121518987341773,
|
6553 |
+
"grad_norm": 468430.91971069,
|
6554 |
+
"learning_rate": 2.1403948605452835e-07,
|
6555 |
+
"logits/chosen": -1.418505072593689,
|
6556 |
+
"logits/rejected": -0.8604210019111633,
|
6557 |
+
"logps/chosen": -38.54343795776367,
|
6558 |
+
"logps/rejected": -585.6038818359375,
|
6559 |
+
"loss": 13499.3969,
|
6560 |
+
"rewards/accuracies": 0.987500011920929,
|
6561 |
+
"rewards/chosen": 0.2021259367465973,
|
6562 |
+
"rewards/margins": 0.5458530187606812,
|
6563 |
+
"rewards/rejected": -0.34372708201408386,
|
6564 |
+
"step": 4360
|
6565 |
+
},
|
6566 |
+
{
|
6567 |
+
"epoch": 22.172151898734178,
|
6568 |
+
"grad_norm": 838303.6265575557,
|
6569 |
+
"learning_rate": 2.1325603259166405e-07,
|
6570 |
+
"logits/chosen": -0.013787698931992054,
|
6571 |
+
"logits/rejected": -0.22224357724189758,
|
6572 |
+
"logps/chosen": -33.32988357543945,
|
6573 |
+
"logps/rejected": -576.55224609375,
|
6574 |
+
"loss": 13816.5812,
|
6575 |
+
"rewards/accuracies": 1.0,
|
6576 |
+
"rewards/chosen": 0.1937212496995926,
|
6577 |
+
"rewards/margins": 0.5373150110244751,
|
6578 |
+
"rewards/rejected": -0.3435937762260437,
|
6579 |
+
"step": 4370
|
6580 |
+
},
|
6581 |
+
{
|
6582 |
+
"epoch": 22.222784810126583,
|
6583 |
+
"grad_norm": 524213.07765733794,
|
6584 |
+
"learning_rate": 2.1247257912879973e-07,
|
6585 |
+
"logits/chosen": 0.3687540888786316,
|
6586 |
+
"logits/rejected": 0.8078397512435913,
|
6587 |
+
"logps/chosen": -38.822872161865234,
|
6588 |
+
"logps/rejected": -553.8123168945312,
|
6589 |
+
"loss": 12435.0875,
|
6590 |
+
"rewards/accuracies": 0.9750000238418579,
|
6591 |
+
"rewards/chosen": 0.18458959460258484,
|
6592 |
+
"rewards/margins": 0.5130779147148132,
|
6593 |
+
"rewards/rejected": -0.32848840951919556,
|
6594 |
+
"step": 4380
|
6595 |
+
},
|
6596 |
+
{
|
6597 |
+
"epoch": 22.27341772151899,
|
6598 |
+
"grad_norm": 476932.283051178,
|
6599 |
+
"learning_rate": 2.1168912566593544e-07,
|
6600 |
+
"logits/chosen": 0.6524232029914856,
|
6601 |
+
"logits/rejected": 0.6763177514076233,
|
6602 |
+
"logps/chosen": -41.4456901550293,
|
6603 |
+
"logps/rejected": -586.055419921875,
|
6604 |
+
"loss": 14132.7062,
|
6605 |
+
"rewards/accuracies": 0.9750000238418579,
|
6606 |
+
"rewards/chosen": 0.20029637217521667,
|
6607 |
+
"rewards/margins": 0.5451359152793884,
|
6608 |
+
"rewards/rejected": -0.34483957290649414,
|
6609 |
+
"step": 4390
|
6610 |
+
},
|
6611 |
+
{
|
6612 |
+
"epoch": 22.324050632911394,
|
6613 |
+
"grad_norm": 568972.1382617814,
|
6614 |
+
"learning_rate": 2.1090567220307112e-07,
|
6615 |
+
"logits/chosen": -0.3675435781478882,
|
6616 |
+
"logits/rejected": 0.2508888840675354,
|
6617 |
+
"logps/chosen": -37.127281188964844,
|
6618 |
+
"logps/rejected": -571.7310180664062,
|
6619 |
+
"loss": 13226.8641,
|
6620 |
+
"rewards/accuracies": 0.987500011920929,
|
6621 |
+
"rewards/chosen": 0.19992589950561523,
|
6622 |
+
"rewards/margins": 0.5378109812736511,
|
6623 |
+
"rewards/rejected": -0.33788514137268066,
|
6624 |
+
"step": 4400
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 22.374683544303796,
|
6628 |
+
"grad_norm": 549953.3378298564,
|
6629 |
+
"learning_rate": 2.1012221874020682e-07,
|
6630 |
+
"logits/chosen": -0.3316110372543335,
|
6631 |
+
"logits/rejected": 0.12318412959575653,
|
6632 |
+
"logps/chosen": -45.176429748535156,
|
6633 |
+
"logps/rejected": -601.1099243164062,
|
6634 |
+
"loss": 13357.3594,
|
6635 |
+
"rewards/accuracies": 1.0,
|
6636 |
+
"rewards/chosen": 0.2056044340133667,
|
6637 |
+
"rewards/margins": 0.5584502220153809,
|
6638 |
+
"rewards/rejected": -0.35284581780433655,
|
6639 |
+
"step": 4410
|
6640 |
+
},
|
6641 |
+
{
|
6642 |
+
"epoch": 22.4253164556962,
|
6643 |
+
"grad_norm": 487398.89046152594,
|
6644 |
+
"learning_rate": 2.093387652773425e-07,
|
6645 |
+
"logits/chosen": -1.0198824405670166,
|
6646 |
+
"logits/rejected": -0.21292218565940857,
|
6647 |
+
"logps/chosen": -36.835960388183594,
|
6648 |
+
"logps/rejected": -577.4632568359375,
|
6649 |
+
"loss": 13915.3031,
|
6650 |
+
"rewards/accuracies": 1.0,
|
6651 |
+
"rewards/chosen": 0.20356829464435577,
|
6652 |
+
"rewards/margins": 0.543707013130188,
|
6653 |
+
"rewards/rejected": -0.3401387631893158,
|
6654 |
+
"step": 4420
|
6655 |
+
},
|
6656 |
+
{
|
6657 |
+
"epoch": 22.475949367088607,
|
6658 |
+
"grad_norm": 477361.2573301333,
|
6659 |
+
"learning_rate": 2.085553118144782e-07,
|
6660 |
+
"logits/chosen": 0.3704206943511963,
|
6661 |
+
"logits/rejected": 0.693733811378479,
|
6662 |
+
"logps/chosen": -46.64609146118164,
|
6663 |
+
"logps/rejected": -594.5731811523438,
|
6664 |
+
"loss": 13106.9359,
|
6665 |
+
"rewards/accuracies": 0.987500011920929,
|
6666 |
+
"rewards/chosen": 0.20661070942878723,
|
6667 |
+
"rewards/margins": 0.5531338453292847,
|
6668 |
+
"rewards/rejected": -0.34652310609817505,
|
6669 |
+
"step": 4430
|
6670 |
+
},
|
6671 |
+
{
|
6672 |
+
"epoch": 22.526582278481012,
|
6673 |
+
"grad_norm": 597606.9724370906,
|
6674 |
+
"learning_rate": 2.077718583516139e-07,
|
6675 |
+
"logits/chosen": -0.6012102365493774,
|
6676 |
+
"logits/rejected": -0.6212292909622192,
|
6677 |
+
"logps/chosen": -36.24720001220703,
|
6678 |
+
"logps/rejected": -570.0081787109375,
|
6679 |
+
"loss": 13390.8625,
|
6680 |
+
"rewards/accuracies": 1.0,
|
6681 |
+
"rewards/chosen": 0.20318233966827393,
|
6682 |
+
"rewards/margins": 0.5344886779785156,
|
6683 |
+
"rewards/rejected": -0.3313063085079193,
|
6684 |
+
"step": 4440
|
6685 |
+
},
|
6686 |
+
{
|
6687 |
+
"epoch": 22.577215189873417,
|
6688 |
+
"grad_norm": 469529.248927815,
|
6689 |
+
"learning_rate": 2.069884048887496e-07,
|
6690 |
+
"logits/chosen": -0.041382573544979095,
|
6691 |
+
"logits/rejected": 0.7878470420837402,
|
6692 |
+
"logps/chosen": -43.38654708862305,
|
6693 |
+
"logps/rejected": -568.7279052734375,
|
6694 |
+
"loss": 13333.4188,
|
6695 |
+
"rewards/accuracies": 1.0,
|
6696 |
+
"rewards/chosen": 0.19029700756072998,
|
6697 |
+
"rewards/margins": 0.5311275124549866,
|
6698 |
+
"rewards/rejected": -0.3408304750919342,
|
6699 |
+
"step": 4450
|
6700 |
+
},
|
6701 |
+
{
|
6702 |
+
"epoch": 22.627848101265823,
|
6703 |
+
"grad_norm": 402623.00766789017,
|
6704 |
+
"learning_rate": 2.0620495142588527e-07,
|
6705 |
+
"logits/chosen": -0.8500850796699524,
|
6706 |
+
"logits/rejected": 0.10065221786499023,
|
6707 |
+
"logps/chosen": -31.52435302734375,
|
6708 |
+
"logps/rejected": -562.478271484375,
|
6709 |
+
"loss": 13787.8797,
|
6710 |
+
"rewards/accuracies": 0.987500011920929,
|
6711 |
+
"rewards/chosen": 0.1997881382703781,
|
6712 |
+
"rewards/margins": 0.5369755029678345,
|
6713 |
+
"rewards/rejected": -0.33718740940093994,
|
6714 |
+
"step": 4460
|
6715 |
+
},
|
6716 |
+
{
|
6717 |
+
"epoch": 22.678481012658228,
|
6718 |
+
"grad_norm": 373755.1797101064,
|
6719 |
+
"learning_rate": 2.05421497963021e-07,
|
6720 |
+
"logits/chosen": -1.3291960954666138,
|
6721 |
+
"logits/rejected": -1.2023630142211914,
|
6722 |
+
"logps/chosen": -34.12505340576172,
|
6723 |
+
"logps/rejected": -600.3700561523438,
|
6724 |
+
"loss": 13297.9406,
|
6725 |
+
"rewards/accuracies": 1.0,
|
6726 |
+
"rewards/chosen": 0.20627860724925995,
|
6727 |
+
"rewards/margins": 0.5676389336585999,
|
6728 |
+
"rewards/rejected": -0.3613602817058563,
|
6729 |
+
"step": 4470
|
6730 |
+
},
|
6731 |
+
{
|
6732 |
+
"epoch": 22.729113924050633,
|
6733 |
+
"grad_norm": 402761.92027776636,
|
6734 |
+
"learning_rate": 2.0463804450015669e-07,
|
6735 |
+
"logits/chosen": -1.5893421173095703,
|
6736 |
+
"logits/rejected": -1.3823096752166748,
|
6737 |
+
"logps/chosen": -30.61318588256836,
|
6738 |
+
"logps/rejected": -584.5267944335938,
|
6739 |
+
"loss": 14326.1875,
|
6740 |
+
"rewards/accuracies": 0.987500011920929,
|
6741 |
+
"rewards/chosen": 0.20079275965690613,
|
6742 |
+
"rewards/margins": 0.5540488958358765,
|
6743 |
+
"rewards/rejected": -0.35325610637664795,
|
6744 |
+
"step": 4480
|
6745 |
+
},
|
6746 |
+
{
|
6747 |
+
"epoch": 22.77974683544304,
|
6748 |
+
"grad_norm": 547067.5872175789,
|
6749 |
+
"learning_rate": 2.038545910372924e-07,
|
6750 |
+
"logits/chosen": 0.4800703525543213,
|
6751 |
+
"logits/rejected": 1.4792516231536865,
|
6752 |
+
"logps/chosen": -27.049495697021484,
|
6753 |
+
"logps/rejected": -563.0673217773438,
|
6754 |
+
"loss": 14447.3891,
|
6755 |
+
"rewards/accuracies": 1.0,
|
6756 |
+
"rewards/chosen": 0.19703736901283264,
|
6757 |
+
"rewards/margins": 0.5409034490585327,
|
6758 |
+
"rewards/rejected": -0.3438660502433777,
|
6759 |
+
"step": 4490
|
6760 |
+
},
|
6761 |
+
{
|
6762 |
+
"epoch": 22.830379746835444,
|
6763 |
+
"grad_norm": 672757.480231201,
|
6764 |
+
"learning_rate": 2.0307113757442807e-07,
|
6765 |
+
"logits/chosen": 0.13832028210163116,
|
6766 |
+
"logits/rejected": 0.6534411907196045,
|
6767 |
+
"logps/chosen": -48.682350158691406,
|
6768 |
+
"logps/rejected": -608.1444091796875,
|
6769 |
+
"loss": 13146.0031,
|
6770 |
+
"rewards/accuracies": 1.0,
|
6771 |
+
"rewards/chosen": 0.20029571652412415,
|
6772 |
+
"rewards/margins": 0.5563368797302246,
|
6773 |
+
"rewards/rejected": -0.35604116320610046,
|
6774 |
+
"step": 4500
|
6775 |
+
},
|
6776 |
+
{
|
6777 |
+
"epoch": 22.88101265822785,
|
6778 |
+
"grad_norm": 369986.02432868385,
|
6779 |
+
"learning_rate": 2.0228768411156378e-07,
|
6780 |
+
"logits/chosen": -1.8307338953018188,
|
6781 |
+
"logits/rejected": -1.2095929384231567,
|
6782 |
+
"logps/chosen": -45.19769287109375,
|
6783 |
+
"logps/rejected": -578.0750122070312,
|
6784 |
+
"loss": 14329.1656,
|
6785 |
+
"rewards/accuracies": 1.0,
|
6786 |
+
"rewards/chosen": 0.1950627863407135,
|
6787 |
+
"rewards/margins": 0.5377144813537598,
|
6788 |
+
"rewards/rejected": -0.3426516652107239,
|
6789 |
+
"step": 4510
|
6790 |
+
},
|
6791 |
+
{
|
6792 |
+
"epoch": 22.931645569620255,
|
6793 |
+
"grad_norm": 699107.7543808775,
|
6794 |
+
"learning_rate": 2.0150423064869946e-07,
|
6795 |
+
"logits/chosen": -0.08685462176799774,
|
6796 |
+
"logits/rejected": 0.9019424319267273,
|
6797 |
+
"logps/chosen": -45.735721588134766,
|
6798 |
+
"logps/rejected": -577.2432861328125,
|
6799 |
+
"loss": 13698.3734,
|
6800 |
+
"rewards/accuracies": 1.0,
|
6801 |
+
"rewards/chosen": 0.19573049247264862,
|
6802 |
+
"rewards/margins": 0.5354525446891785,
|
6803 |
+
"rewards/rejected": -0.33972200751304626,
|
6804 |
+
"step": 4520
|
6805 |
+
},
|
6806 |
+
{
|
6807 |
+
"epoch": 22.98227848101266,
|
6808 |
+
"grad_norm": 406293.788277243,
|
6809 |
+
"learning_rate": 2.0072077718583516e-07,
|
6810 |
+
"logits/chosen": -0.8599483370780945,
|
6811 |
+
"logits/rejected": 0.11351003497838974,
|
6812 |
+
"logps/chosen": -26.267419815063477,
|
6813 |
+
"logps/rejected": -555.6368408203125,
|
6814 |
+
"loss": 13659.9219,
|
6815 |
+
"rewards/accuracies": 0.987500011920929,
|
6816 |
+
"rewards/chosen": 0.19417151808738708,
|
6817 |
+
"rewards/margins": 0.5340765714645386,
|
6818 |
+
"rewards/rejected": -0.3399050235748291,
|
6819 |
+
"step": 4530
|
6820 |
}
|
6821 |
],
|
6822 |
"logging_steps": 10,
|