Training in progress, epoch 24, checkpoint
Browse files- last-checkpoint/global_step4928/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step4928/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step4928/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a57a9a23ef7f096514a7c23cd3b6dd9320c85781d700c34b4b22f2c8df92a75
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:021f57078c9044b2309021b4fc20bdc862c548034ebebb7a1e2c77c8eaae5d09
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19eaeba11c0ceb2532ddfd732a74b89769b0567f5c2480d566b19b72c9fd6a80
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee5c93da09fbfeb06fc19b3cef49ab91db0e725035444d6cdb96809b8b9eadcb
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c00ffef8eefa8f8358ed1c6f61d654b20c27eddbfe0dd82830a9f20e16757f85
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c10d54de0cd548e613485ee0e69da7d813ee80b2317569160958e2e4574aa72b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:626e1b55e0f1d6f3ce34dcade96d7fba40f0099032be2cd622d3062d9fca2e0e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb896c66c8c21d39353a007d4c2ac348fc8f8b10134c68b5313b9e83069fd695
|
3 |
+
size 2506176112
|
last-checkpoint/global_step4928/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84194c554e1ba6e218f3d9df508f289e525895d9d44dbdbafac31ef0e28aa543
|
3 |
+
size 85570
|
last-checkpoint/global_step4928/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca9ecac1033adac02f1e567dee0188e38f517d7c2675ace09591182ee269512c
|
3 |
+
size 85506
|
last-checkpoint/global_step4928/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77c04da8f7351dc76a8b4e781d08009e5e26ec0026e8092fb4066e4ee4c0c017
|
3 |
+
size 85506
|
last-checkpoint/global_step4928/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52caac51b5ab59ce3fdf3092538f84272c220a80081cc21f22f230699d5152d1
|
3 |
+
size 85506
|
last-checkpoint/global_step4928/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15beab398fd58f2a629c4c60d768da129efbcfc290c667ba33bdf6fd1245d530
|
3 |
+
size 85506
|
last-checkpoint/global_step4928/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc28cb444e8b65d936deba8cb0b52f072349842d1308d11e82c4aed0769e1934
|
3 |
+
size 85506
|
last-checkpoint/global_step4928/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:350b51963f22fad0c545de65665496d3aeea7062c1ab553cc246600f06963c6a
|
3 |
+
size 85506
|
last-checkpoint/global_step4928/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76b41ed59377277c212831b6ecb28e620914d57efeefc205f64c9c84a7c32c26
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step4928
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9da2b7dcfa00a88a71ba39f69087d6c106b9fc3502f080f4fdf0c95ba7f0232
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66272c39e510a03fae4a8b8c2051165d69a38abcede58508cbcdf95984247125
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6118faeaee4fdfaa13ac70fa72cfa116964f5a3b96fac7723d08df58a38b397c
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dfa684751cc5b0d0c6c20837671c3f615d1dea15a14f377cd3f328d98685be6
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6288d8200e463b5ae8ad06a62ffa8cc379f48c86fd01515a13181f976efa71a
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd3d0484a9201a8efbd51326113ef6b44a0b272cfa40529d39c56a9d67deac8e
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:426c6fc3fcaae6fad4fdb83d268d91ed94e3e0e88bed483e185a78e86a9692b3
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d089430782ecc3b7446f527601b569a4990fdd9f8d96c18c87dc8d503cbdb70
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b61f73069ccc96351c7253c2334d20c170e92750457b685c9d79286d288292e9
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e70ddcc8f9ba6c0be1ac8d1ba2f06e8a4253e0a843f3a263a85c073416ca948d
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc0dea9c7426022052c6904d669ed6537aacbe6f69c00de710577f64daa74c6b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 24.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7117,6 +7117,291 @@
|
|
7117 |
"rewards/margins": 0.5577437877655029,
|
7118 |
"rewards/rejected": -0.3512403070926666,
|
7119 |
"step": 4730
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7120 |
}
|
7121 |
],
|
7122 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 24.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 4928,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7117 |
"rewards/margins": 0.5577437877655029,
|
7118 |
"rewards/rejected": -0.3512403070926666,
|
7119 |
"step": 4730
|
7120 |
+
},
|
7121 |
+
{
|
7122 |
+
"epoch": 24.045569620253165,
|
7123 |
+
"grad_norm": 627241.9523500776,
|
7124 |
+
"learning_rate": 1.8426825446568473e-07,
|
7125 |
+
"logits/chosen": -0.28766584396362305,
|
7126 |
+
"logits/rejected": -0.6269916296005249,
|
7127 |
+
"logps/chosen": -44.65516662597656,
|
7128 |
+
"logps/rejected": -577.6954345703125,
|
7129 |
+
"loss": 12950.232,
|
7130 |
+
"rewards/accuracies": 1.0,
|
7131 |
+
"rewards/chosen": 0.20441746711730957,
|
7132 |
+
"rewards/margins": 0.5374675393104553,
|
7133 |
+
"rewards/rejected": -0.33305004239082336,
|
7134 |
+
"step": 4740
|
7135 |
+
},
|
7136 |
+
{
|
7137 |
+
"epoch": 24.09620253164557,
|
7138 |
+
"grad_norm": 347781.48168387014,
|
7139 |
+
"learning_rate": 1.8348480100282043e-07,
|
7140 |
+
"logits/chosen": -0.7881828546524048,
|
7141 |
+
"logits/rejected": 0.06337795406579971,
|
7142 |
+
"logps/chosen": -27.82383155822754,
|
7143 |
+
"logps/rejected": -579.8468017578125,
|
7144 |
+
"loss": 12986.6266,
|
7145 |
+
"rewards/accuracies": 1.0,
|
7146 |
+
"rewards/chosen": 0.2079242467880249,
|
7147 |
+
"rewards/margins": 0.5530039668083191,
|
7148 |
+
"rewards/rejected": -0.3450797498226166,
|
7149 |
+
"step": 4750
|
7150 |
+
},
|
7151 |
+
{
|
7152 |
+
"epoch": 24.146835443037975,
|
7153 |
+
"grad_norm": 263636.0742414822,
|
7154 |
+
"learning_rate": 1.827013475399561e-07,
|
7155 |
+
"logits/chosen": -1.2877452373504639,
|
7156 |
+
"logits/rejected": -0.24622194468975067,
|
7157 |
+
"logps/chosen": -30.940113067626953,
|
7158 |
+
"logps/rejected": -566.517333984375,
|
7159 |
+
"loss": 12716.3281,
|
7160 |
+
"rewards/accuracies": 0.987500011920929,
|
7161 |
+
"rewards/chosen": 0.20519797503948212,
|
7162 |
+
"rewards/margins": 0.5418139696121216,
|
7163 |
+
"rewards/rejected": -0.33661606907844543,
|
7164 |
+
"step": 4760
|
7165 |
+
},
|
7166 |
+
{
|
7167 |
+
"epoch": 24.19746835443038,
|
7168 |
+
"grad_norm": 343217.0970891512,
|
7169 |
+
"learning_rate": 1.8191789407709182e-07,
|
7170 |
+
"logits/chosen": -1.0294177532196045,
|
7171 |
+
"logits/rejected": -0.4815802574157715,
|
7172 |
+
"logps/chosen": -31.3253231048584,
|
7173 |
+
"logps/rejected": -560.1129150390625,
|
7174 |
+
"loss": 13408.0094,
|
7175 |
+
"rewards/accuracies": 0.9750000238418579,
|
7176 |
+
"rewards/chosen": 0.19244466722011566,
|
7177 |
+
"rewards/margins": 0.5293484926223755,
|
7178 |
+
"rewards/rejected": -0.3369038701057434,
|
7179 |
+
"step": 4770
|
7180 |
+
},
|
7181 |
+
{
|
7182 |
+
"epoch": 24.248101265822786,
|
7183 |
+
"grad_norm": 504214.9301429895,
|
7184 |
+
"learning_rate": 1.811344406142275e-07,
|
7185 |
+
"logits/chosen": -0.604789137840271,
|
7186 |
+
"logits/rejected": 0.22590136528015137,
|
7187 |
+
"logps/chosen": -41.899864196777344,
|
7188 |
+
"logps/rejected": -555.2015380859375,
|
7189 |
+
"loss": 13531.9625,
|
7190 |
+
"rewards/accuracies": 0.9750000238418579,
|
7191 |
+
"rewards/chosen": 0.190764918923378,
|
7192 |
+
"rewards/margins": 0.5197011232376099,
|
7193 |
+
"rewards/rejected": -0.32893624901771545,
|
7194 |
+
"step": 4780
|
7195 |
+
},
|
7196 |
+
{
|
7197 |
+
"epoch": 24.29873417721519,
|
7198 |
+
"grad_norm": 376293.61129873747,
|
7199 |
+
"learning_rate": 1.803509871513632e-07,
|
7200 |
+
"logits/chosen": -0.3547658324241638,
|
7201 |
+
"logits/rejected": -0.13969659805297852,
|
7202 |
+
"logps/chosen": -28.85129737854004,
|
7203 |
+
"logps/rejected": -577.5880126953125,
|
7204 |
+
"loss": 12898.8125,
|
7205 |
+
"rewards/accuracies": 1.0,
|
7206 |
+
"rewards/chosen": 0.19991345703601837,
|
7207 |
+
"rewards/margins": 0.5475735664367676,
|
7208 |
+
"rewards/rejected": -0.3476601243019104,
|
7209 |
+
"step": 4790
|
7210 |
+
},
|
7211 |
+
{
|
7212 |
+
"epoch": 24.349367088607593,
|
7213 |
+
"grad_norm": 414135.66871189536,
|
7214 |
+
"learning_rate": 1.7956753368849888e-07,
|
7215 |
+
"logits/chosen": 0.09798486530780792,
|
7216 |
+
"logits/rejected": 0.8055311441421509,
|
7217 |
+
"logps/chosen": -34.26675796508789,
|
7218 |
+
"logps/rejected": -584.9435424804688,
|
7219 |
+
"loss": 12507.7328,
|
7220 |
+
"rewards/accuracies": 1.0,
|
7221 |
+
"rewards/chosen": 0.20852184295654297,
|
7222 |
+
"rewards/margins": 0.5590152144432068,
|
7223 |
+
"rewards/rejected": -0.3504934012889862,
|
7224 |
+
"step": 4800
|
7225 |
+
},
|
7226 |
+
{
|
7227 |
+
"epoch": 24.4,
|
7228 |
+
"grad_norm": 343912.9213203651,
|
7229 |
+
"learning_rate": 1.787840802256346e-07,
|
7230 |
+
"logits/chosen": -0.12874791026115417,
|
7231 |
+
"logits/rejected": 0.15529172122478485,
|
7232 |
+
"logps/chosen": -32.1009635925293,
|
7233 |
+
"logps/rejected": -574.7623291015625,
|
7234 |
+
"loss": 13372.5594,
|
7235 |
+
"rewards/accuracies": 0.987500011920929,
|
7236 |
+
"rewards/chosen": 0.1985681653022766,
|
7237 |
+
"rewards/margins": 0.546955943107605,
|
7238 |
+
"rewards/rejected": -0.34838777780532837,
|
7239 |
+
"step": 4810
|
7240 |
+
},
|
7241 |
+
{
|
7242 |
+
"epoch": 24.450632911392404,
|
7243 |
+
"grad_norm": 697134.5088322331,
|
7244 |
+
"learning_rate": 1.7800062676277027e-07,
|
7245 |
+
"logits/chosen": 1.3246450424194336,
|
7246 |
+
"logits/rejected": 1.760595679283142,
|
7247 |
+
"logps/chosen": -32.29949188232422,
|
7248 |
+
"logps/rejected": -575.516357421875,
|
7249 |
+
"loss": 12415.9672,
|
7250 |
+
"rewards/accuracies": 0.987500011920929,
|
7251 |
+
"rewards/chosen": 0.19931714236736298,
|
7252 |
+
"rewards/margins": 0.5466721057891846,
|
7253 |
+
"rewards/rejected": -0.3473549485206604,
|
7254 |
+
"step": 4820
|
7255 |
+
},
|
7256 |
+
{
|
7257 |
+
"epoch": 24.50126582278481,
|
7258 |
+
"grad_norm": 389676.90431780973,
|
7259 |
+
"learning_rate": 1.7721717329990597e-07,
|
7260 |
+
"logits/chosen": -1.3381322622299194,
|
7261 |
+
"logits/rejected": -0.6404735445976257,
|
7262 |
+
"logps/chosen": -34.635719299316406,
|
7263 |
+
"logps/rejected": -587.3080444335938,
|
7264 |
+
"loss": 13101.6227,
|
7265 |
+
"rewards/accuracies": 1.0,
|
7266 |
+
"rewards/chosen": 0.20362886786460876,
|
7267 |
+
"rewards/margins": 0.5546956658363342,
|
7268 |
+
"rewards/rejected": -0.3510667383670807,
|
7269 |
+
"step": 4830
|
7270 |
+
},
|
7271 |
+
{
|
7272 |
+
"epoch": 24.551898734177215,
|
7273 |
+
"grad_norm": 314600.5637340995,
|
7274 |
+
"learning_rate": 1.7643371983704165e-07,
|
7275 |
+
"logits/chosen": 0.9953921437263489,
|
7276 |
+
"logits/rejected": 0.9643779993057251,
|
7277 |
+
"logps/chosen": -30.646331787109375,
|
7278 |
+
"logps/rejected": -570.855712890625,
|
7279 |
+
"loss": 12974.9633,
|
7280 |
+
"rewards/accuracies": 0.9750000238418579,
|
7281 |
+
"rewards/chosen": 0.19645583629608154,
|
7282 |
+
"rewards/margins": 0.5343093872070312,
|
7283 |
+
"rewards/rejected": -0.3378535211086273,
|
7284 |
+
"step": 4840
|
7285 |
+
},
|
7286 |
+
{
|
7287 |
+
"epoch": 24.60253164556962,
|
7288 |
+
"grad_norm": 327013.6839426029,
|
7289 |
+
"learning_rate": 1.7565026637417739e-07,
|
7290 |
+
"logits/chosen": -0.7217426300048828,
|
7291 |
+
"logits/rejected": -0.7290517091751099,
|
7292 |
+
"logps/chosen": -37.666648864746094,
|
7293 |
+
"logps/rejected": -563.5865478515625,
|
7294 |
+
"loss": 13273.0266,
|
7295 |
+
"rewards/accuracies": 0.9624999761581421,
|
7296 |
+
"rewards/chosen": 0.197954460978508,
|
7297 |
+
"rewards/margins": 0.5282526612281799,
|
7298 |
+
"rewards/rejected": -0.33029812574386597,
|
7299 |
+
"step": 4850
|
7300 |
+
},
|
7301 |
+
{
|
7302 |
+
"epoch": 24.653164556962025,
|
7303 |
+
"grad_norm": 425662.97201424866,
|
7304 |
+
"learning_rate": 1.7486681291131307e-07,
|
7305 |
+
"logits/chosen": -0.32900291681289673,
|
7306 |
+
"logits/rejected": 0.18864622712135315,
|
7307 |
+
"logps/chosen": -32.36582565307617,
|
7308 |
+
"logps/rejected": -566.5547485351562,
|
7309 |
+
"loss": 13350.5594,
|
7310 |
+
"rewards/accuracies": 0.9750000238418579,
|
7311 |
+
"rewards/chosen": 0.19610336422920227,
|
7312 |
+
"rewards/margins": 0.5321984887123108,
|
7313 |
+
"rewards/rejected": -0.3360951244831085,
|
7314 |
+
"step": 4860
|
7315 |
+
},
|
7316 |
+
{
|
7317 |
+
"epoch": 24.70379746835443,
|
7318 |
+
"grad_norm": 402351.7657170625,
|
7319 |
+
"learning_rate": 1.7408335944844877e-07,
|
7320 |
+
"logits/chosen": -2.112635850906372,
|
7321 |
+
"logits/rejected": -1.4337832927703857,
|
7322 |
+
"logps/chosen": -36.27765655517578,
|
7323 |
+
"logps/rejected": -588.708740234375,
|
7324 |
+
"loss": 13690.0375,
|
7325 |
+
"rewards/accuracies": 1.0,
|
7326 |
+
"rewards/chosen": 0.20721419155597687,
|
7327 |
+
"rewards/margins": 0.5532296299934387,
|
7328 |
+
"rewards/rejected": -0.34601545333862305,
|
7329 |
+
"step": 4870
|
7330 |
+
},
|
7331 |
+
{
|
7332 |
+
"epoch": 24.754430379746836,
|
7333 |
+
"grad_norm": 357871.7969812476,
|
7334 |
+
"learning_rate": 1.7329990598558445e-07,
|
7335 |
+
"logits/chosen": -0.34599849581718445,
|
7336 |
+
"logits/rejected": 0.0005200624582357705,
|
7337 |
+
"logps/chosen": -28.50592041015625,
|
7338 |
+
"logps/rejected": -560.6458740234375,
|
7339 |
+
"loss": 13133.4,
|
7340 |
+
"rewards/accuracies": 0.987500011920929,
|
7341 |
+
"rewards/chosen": 0.1971847414970398,
|
7342 |
+
"rewards/margins": 0.5326961278915405,
|
7343 |
+
"rewards/rejected": -0.33551135659217834,
|
7344 |
+
"step": 4880
|
7345 |
+
},
|
7346 |
+
{
|
7347 |
+
"epoch": 24.80506329113924,
|
7348 |
+
"grad_norm": 610016.4055058825,
|
7349 |
+
"learning_rate": 1.7251645252272016e-07,
|
7350 |
+
"logits/chosen": -1.225339651107788,
|
7351 |
+
"logits/rejected": -0.8243592977523804,
|
7352 |
+
"logps/chosen": -34.17732620239258,
|
7353 |
+
"logps/rejected": -581.1436767578125,
|
7354 |
+
"loss": 12571.1,
|
7355 |
+
"rewards/accuracies": 1.0,
|
7356 |
+
"rewards/chosen": 0.20419716835021973,
|
7357 |
+
"rewards/margins": 0.5520066022872925,
|
7358 |
+
"rewards/rejected": -0.347809374332428,
|
7359 |
+
"step": 4890
|
7360 |
+
},
|
7361 |
+
{
|
7362 |
+
"epoch": 24.855696202531647,
|
7363 |
+
"grad_norm": 379732.159808034,
|
7364 |
+
"learning_rate": 1.7173299905985584e-07,
|
7365 |
+
"logits/chosen": -1.817439317703247,
|
7366 |
+
"logits/rejected": -1.4895861148834229,
|
7367 |
+
"logps/chosen": -30.775598526000977,
|
7368 |
+
"logps/rejected": -570.484130859375,
|
7369 |
+
"loss": 13212.3781,
|
7370 |
+
"rewards/accuracies": 0.9750000238418579,
|
7371 |
+
"rewards/chosen": 0.20220620930194855,
|
7372 |
+
"rewards/margins": 0.5392990112304688,
|
7373 |
+
"rewards/rejected": -0.3370928466320038,
|
7374 |
+
"step": 4900
|
7375 |
+
},
|
7376 |
+
{
|
7377 |
+
"epoch": 24.906329113924052,
|
7378 |
+
"grad_norm": 324902.93726753094,
|
7379 |
+
"learning_rate": 1.7094954559699154e-07,
|
7380 |
+
"logits/chosen": -0.6947388648986816,
|
7381 |
+
"logits/rejected": -0.4560522437095642,
|
7382 |
+
"logps/chosen": -40.327796936035156,
|
7383 |
+
"logps/rejected": -581.7532348632812,
|
7384 |
+
"loss": 12891.9016,
|
7385 |
+
"rewards/accuracies": 0.987500011920929,
|
7386 |
+
"rewards/chosen": 0.20357458293437958,
|
7387 |
+
"rewards/margins": 0.5383836030960083,
|
7388 |
+
"rewards/rejected": -0.33480900526046753,
|
7389 |
+
"step": 4910
|
7390 |
+
},
|
7391 |
+
{
|
7392 |
+
"epoch": 24.956962025316457,
|
7393 |
+
"grad_norm": 294429.7500390618,
|
7394 |
+
"learning_rate": 1.7016609213412722e-07,
|
7395 |
+
"logits/chosen": -0.9121583104133606,
|
7396 |
+
"logits/rejected": 0.40684938430786133,
|
7397 |
+
"logps/chosen": -28.22664451599121,
|
7398 |
+
"logps/rejected": -580.9795532226562,
|
7399 |
+
"loss": 13184.5812,
|
7400 |
+
"rewards/accuracies": 1.0,
|
7401 |
+
"rewards/chosen": 0.20078356564044952,
|
7402 |
+
"rewards/margins": 0.5547462701797485,
|
7403 |
+
"rewards/rejected": -0.3539626896381378,
|
7404 |
+
"step": 4920
|
7405 |
}
|
7406 |
],
|
7407 |
"logging_steps": 10,
|