cat-searcher
commited on
Commit
•
b3e3fde
1
Parent(s):
689f398
Training in progress, epoch 32, checkpoint
Browse files- last-checkpoint/global_step6311/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6311/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step6311/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1638964301fb436493919c9fca6ca0b24757bffcb3ab3631c4318b6d2c5c8da
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5e67296286fe1995c5e46e13fe58ea336b5760742373cd74308f087993422e9
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3533e6d8ea544fc4ac3fea983acae782fc11c435392c67fd45efbba66afe7076
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c3852775e4256033ec3bbb1ed5e12a1ab01d36b2e7c7dfa237a1096da618a7c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f72ffbe1117371c6d0b90a927c2bd5e0ca456a7c586cc30dfe0092af8d9ecb16
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:537bbcb71f2ca58a96edea530f6b5f05ed36f594640ad73ba45da2c4ba63a4ac
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c714754e6c8f8ef3f81965b74c1d37905a2dcd11a4942468b05dada83c63829e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d0800d4f8b21fa8588dc1d10c8c2a4f8f3f905343d12a9ae3a6fca9ba22f61e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6311/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af698d0c6a7367349e9bb5f732a10dbe3e65bd62e6f7c19dceaa8c7eb53d63f1
|
3 |
+
size 85570
|
last-checkpoint/global_step6311/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:858e5e94768ad6f67b1fb3868dd116f4d8883c9770589981d581b40df4a97098
|
3 |
+
size 85506
|
last-checkpoint/global_step6311/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d203316b3f06902043bcb35792974b70921fce39bb38b45cd7cd2e8559d7b3d
|
3 |
+
size 85506
|
last-checkpoint/global_step6311/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcc85a051658c2e2cf8843f05040e6207f3a9824e6eb392914d967306e1cc2d7
|
3 |
+
size 85506
|
last-checkpoint/global_step6311/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff794e200e434a6beb015ae1d4143bf6d1148d6c47b39da39e72e337662878a2
|
3 |
+
size 85506
|
last-checkpoint/global_step6311/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb9529a96a97431f16deb193473953e218d01c6f842bdfcfa6f37707a22cc88
|
3 |
+
size 85506
|
last-checkpoint/global_step6311/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:faa1b1523feff7fa82521ad7403b9e750238f0dc56ba883bfe6bd9a5cb05a21b
|
3 |
+
size 85506
|
last-checkpoint/global_step6311/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2372cf41d29fbbd2d4204983371bbdc350d5ee42cc07209ffbeea2c756d98ff0
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step6311
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a4166953b305647bfe20e1bbeb15a19c28014fe214420eadf6709470c2313a3
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60ab1e0ab13e8465a7f0033de41b00c3ef85f0686c27f26ccedd41583cf589de
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2aa29a1a0077819b6e6c9858b2870298aef9379363892f7f4a488e66c5bd38
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:291da2a884d82312ccaa711af98beaa52d4f41499984405613bb21b5148565a2
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1bbb7042adc0fc2a1ac7bf08c10090a51f5d0491c80cb36a3e5f1380294d890
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70f0dc59a31366ea078c9fd119c2fc25b442df27e88be4fecef8251ec325566a
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26207c8fd2f08c67b253e0d4b8b67c73971a41b870b2d0d9e52e1388c89513b5
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac396bf6594a215db06382f8b0dfcb2360c2c6b1a95d8150fec6afd800852574
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2edf36543f3b923b139f3541d6d94d5a2d50de85da08b18fa6867198430e57c
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f96eea372344884b68478842af038d9832fda66fa5d46d28035e601a7834efd4
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ac6a779be83142d0fb2f1a0e7950874d368e5e14f0bcab557da3d6573142406
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9187,6 +9187,306 @@
|
|
9187 |
"rewards/margins": 0.5498504042625427,
|
9188 |
"rewards/rejected": -0.34040600061416626,
|
9189 |
"step": 6110
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9190 |
}
|
9191 |
],
|
9192 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 32.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 6311,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9187 |
"rewards/margins": 0.5498504042625427,
|
9188 |
"rewards/rejected": -0.34040600061416626,
|
9189 |
"step": 6110
|
9190 |
+
},
|
9191 |
+
{
|
9192 |
+
"epoch": 31.03291139240506,
|
9193 |
+
"grad_norm": 111295.73044950665,
|
9194 |
+
"learning_rate": 7.615167659041052e-08,
|
9195 |
+
"logits/chosen": -0.7748550772666931,
|
9196 |
+
"logits/rejected": -0.973538875579834,
|
9197 |
+
"logps/chosen": -31.6827335357666,
|
9198 |
+
"logps/rejected": -594.8641357421875,
|
9199 |
+
"loss": 11721.4203,
|
9200 |
+
"rewards/accuracies": 0.987500011920929,
|
9201 |
+
"rewards/chosen": 0.20987281203269958,
|
9202 |
+
"rewards/margins": 0.5614258050918579,
|
9203 |
+
"rewards/rejected": -0.3515530228614807,
|
9204 |
+
"step": 6120
|
9205 |
+
},
|
9206 |
+
{
|
9207 |
+
"epoch": 31.083544303797467,
|
9208 |
+
"grad_norm": 132943.3056647964,
|
9209 |
+
"learning_rate": 7.536822312754621e-08,
|
9210 |
+
"logits/chosen": -2.017181396484375,
|
9211 |
+
"logits/rejected": -1.8383163213729858,
|
9212 |
+
"logps/chosen": -32.51802062988281,
|
9213 |
+
"logps/rejected": -609.6942138671875,
|
9214 |
+
"loss": 12392.7875,
|
9215 |
+
"rewards/accuracies": 1.0,
|
9216 |
+
"rewards/chosen": 0.22407253086566925,
|
9217 |
+
"rewards/margins": 0.582473874092102,
|
9218 |
+
"rewards/rejected": -0.3584012985229492,
|
9219 |
+
"step": 6130
|
9220 |
+
},
|
9221 |
+
{
|
9222 |
+
"epoch": 31.134177215189872,
|
9223 |
+
"grad_norm": 174931.96319021285,
|
9224 |
+
"learning_rate": 7.45847696646819e-08,
|
9225 |
+
"logits/chosen": -0.5535727143287659,
|
9226 |
+
"logits/rejected": 0.6218046545982361,
|
9227 |
+
"logps/chosen": -26.1910457611084,
|
9228 |
+
"logps/rejected": -551.5840454101562,
|
9229 |
+
"loss": 11699.3109,
|
9230 |
+
"rewards/accuracies": 0.987500011920929,
|
9231 |
+
"rewards/chosen": 0.20296287536621094,
|
9232 |
+
"rewards/margins": 0.5298973917961121,
|
9233 |
+
"rewards/rejected": -0.3269345760345459,
|
9234 |
+
"step": 6140
|
9235 |
+
},
|
9236 |
+
{
|
9237 |
+
"epoch": 31.184810126582278,
|
9238 |
+
"grad_norm": 168688.32644125135,
|
9239 |
+
"learning_rate": 7.380131620181761e-08,
|
9240 |
+
"logits/chosen": -1.008988618850708,
|
9241 |
+
"logits/rejected": -0.2778696119785309,
|
9242 |
+
"logps/chosen": -33.33096694946289,
|
9243 |
+
"logps/rejected": -607.976806640625,
|
9244 |
+
"loss": 11916.4016,
|
9245 |
+
"rewards/accuracies": 0.987500011920929,
|
9246 |
+
"rewards/chosen": 0.21690383553504944,
|
9247 |
+
"rewards/margins": 0.5754967331886292,
|
9248 |
+
"rewards/rejected": -0.3585929274559021,
|
9249 |
+
"step": 6150
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 31.235443037974683,
|
9253 |
+
"grad_norm": 94661.132576451,
|
9254 |
+
"learning_rate": 7.30178627389533e-08,
|
9255 |
+
"logits/chosen": -3.0997250080108643,
|
9256 |
+
"logits/rejected": -2.1219401359558105,
|
9257 |
+
"logps/chosen": -27.209686279296875,
|
9258 |
+
"logps/rejected": -589.7662353515625,
|
9259 |
+
"loss": 12111.9188,
|
9260 |
+
"rewards/accuracies": 1.0,
|
9261 |
+
"rewards/chosen": 0.21759450435638428,
|
9262 |
+
"rewards/margins": 0.5674911737442017,
|
9263 |
+
"rewards/rejected": -0.3498966693878174,
|
9264 |
+
"step": 6160
|
9265 |
+
},
|
9266 |
+
{
|
9267 |
+
"epoch": 31.28607594936709,
|
9268 |
+
"grad_norm": 129537.98682999605,
|
9269 |
+
"learning_rate": 7.2234409276089e-08,
|
9270 |
+
"logits/chosen": -2.1777210235595703,
|
9271 |
+
"logits/rejected": -2.1664652824401855,
|
9272 |
+
"logps/chosen": -29.21515464782715,
|
9273 |
+
"logps/rejected": -575.5145263671875,
|
9274 |
+
"loss": 12396.4562,
|
9275 |
+
"rewards/accuracies": 0.987500011920929,
|
9276 |
+
"rewards/chosen": 0.20168697834014893,
|
9277 |
+
"rewards/margins": 0.5468615293502808,
|
9278 |
+
"rewards/rejected": -0.3451746106147766,
|
9279 |
+
"step": 6170
|
9280 |
+
},
|
9281 |
+
{
|
9282 |
+
"epoch": 31.336708860759494,
|
9283 |
+
"grad_norm": 146320.37748909468,
|
9284 |
+
"learning_rate": 7.145095581322469e-08,
|
9285 |
+
"logits/chosen": -0.37119048833847046,
|
9286 |
+
"logits/rejected": -0.12678974866867065,
|
9287 |
+
"logps/chosen": -27.464313507080078,
|
9288 |
+
"logps/rejected": -583.199462890625,
|
9289 |
+
"loss": 12035.1789,
|
9290 |
+
"rewards/accuracies": 1.0,
|
9291 |
+
"rewards/chosen": 0.20687448978424072,
|
9292 |
+
"rewards/margins": 0.5559764504432678,
|
9293 |
+
"rewards/rejected": -0.3491020202636719,
|
9294 |
+
"step": 6180
|
9295 |
+
},
|
9296 |
+
{
|
9297 |
+
"epoch": 31.3873417721519,
|
9298 |
+
"grad_norm": 123464.43072965978,
|
9299 |
+
"learning_rate": 7.066750235036038e-08,
|
9300 |
+
"logits/chosen": -1.114485740661621,
|
9301 |
+
"logits/rejected": -0.36546590924263,
|
9302 |
+
"logps/chosen": -24.96463394165039,
|
9303 |
+
"logps/rejected": -573.1627197265625,
|
9304 |
+
"loss": 12102.0078,
|
9305 |
+
"rewards/accuracies": 1.0,
|
9306 |
+
"rewards/chosen": 0.2025957852602005,
|
9307 |
+
"rewards/margins": 0.5483575463294983,
|
9308 |
+
"rewards/rejected": -0.3457617163658142,
|
9309 |
+
"step": 6190
|
9310 |
+
},
|
9311 |
+
{
|
9312 |
+
"epoch": 31.437974683544304,
|
9313 |
+
"grad_norm": 182155.23164206932,
|
9314 |
+
"learning_rate": 6.988404888749608e-08,
|
9315 |
+
"logits/chosen": -1.7520939111709595,
|
9316 |
+
"logits/rejected": -1.4854246377944946,
|
9317 |
+
"logps/chosen": -29.002777099609375,
|
9318 |
+
"logps/rejected": -592.4381713867188,
|
9319 |
+
"loss": 11423.6828,
|
9320 |
+
"rewards/accuracies": 1.0,
|
9321 |
+
"rewards/chosen": 0.2187313735485077,
|
9322 |
+
"rewards/margins": 0.5657260417938232,
|
9323 |
+
"rewards/rejected": -0.34699463844299316,
|
9324 |
+
"step": 6200
|
9325 |
+
},
|
9326 |
+
{
|
9327 |
+
"epoch": 31.48860759493671,
|
9328 |
+
"grad_norm": 148737.16455364344,
|
9329 |
+
"learning_rate": 6.910059542463177e-08,
|
9330 |
+
"logits/chosen": 0.025389552116394043,
|
9331 |
+
"logits/rejected": -0.27969443798065186,
|
9332 |
+
"logps/chosen": -17.67035675048828,
|
9333 |
+
"logps/rejected": -546.9998168945312,
|
9334 |
+
"loss": 11498.325,
|
9335 |
+
"rewards/accuracies": 0.987500011920929,
|
9336 |
+
"rewards/chosen": 0.19227565824985504,
|
9337 |
+
"rewards/margins": 0.5237180590629578,
|
9338 |
+
"rewards/rejected": -0.33144229650497437,
|
9339 |
+
"step": 6210
|
9340 |
+
},
|
9341 |
+
{
|
9342 |
+
"epoch": 31.539240506329115,
|
9343 |
+
"grad_norm": 186784.06647045226,
|
9344 |
+
"learning_rate": 6.831714196176746e-08,
|
9345 |
+
"logits/chosen": -3.0769848823547363,
|
9346 |
+
"logits/rejected": -2.87144136428833,
|
9347 |
+
"logps/chosen": -25.640066146850586,
|
9348 |
+
"logps/rejected": -605.6832885742188,
|
9349 |
+
"loss": 11701.2086,
|
9350 |
+
"rewards/accuracies": 1.0,
|
9351 |
+
"rewards/chosen": 0.21926145255565643,
|
9352 |
+
"rewards/margins": 0.5798953771591187,
|
9353 |
+
"rewards/rejected": -0.3606340289115906,
|
9354 |
+
"step": 6220
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 31.58987341772152,
|
9358 |
+
"grad_norm": 108314.28535819704,
|
9359 |
+
"learning_rate": 6.753368849890315e-08,
|
9360 |
+
"logits/chosen": -0.5384847521781921,
|
9361 |
+
"logits/rejected": -0.6974294781684875,
|
9362 |
+
"logps/chosen": -26.830814361572266,
|
9363 |
+
"logps/rejected": -587.5255126953125,
|
9364 |
+
"loss": 11231.8016,
|
9365 |
+
"rewards/accuracies": 0.9750000238418579,
|
9366 |
+
"rewards/chosen": 0.20746394991874695,
|
9367 |
+
"rewards/margins": 0.557998776435852,
|
9368 |
+
"rewards/rejected": -0.3505348265171051,
|
9369 |
+
"step": 6230
|
9370 |
+
},
|
9371 |
+
{
|
9372 |
+
"epoch": 31.640506329113926,
|
9373 |
+
"grad_norm": 197387.20948770002,
|
9374 |
+
"learning_rate": 6.675023503603886e-08,
|
9375 |
+
"logits/chosen": -0.6654781103134155,
|
9376 |
+
"logits/rejected": -1.1572941541671753,
|
9377 |
+
"logps/chosen": -27.918231964111328,
|
9378 |
+
"logps/rejected": -592.8441162109375,
|
9379 |
+
"loss": 11850.0031,
|
9380 |
+
"rewards/accuracies": 1.0,
|
9381 |
+
"rewards/chosen": 0.21056847274303436,
|
9382 |
+
"rewards/margins": 0.5671868920326233,
|
9383 |
+
"rewards/rejected": -0.3566184341907501,
|
9384 |
+
"step": 6240
|
9385 |
+
},
|
9386 |
+
{
|
9387 |
+
"epoch": 31.691139240506327,
|
9388 |
+
"grad_norm": 178129.00858003844,
|
9389 |
+
"learning_rate": 6.596678157317455e-08,
|
9390 |
+
"logits/chosen": 0.17990253865718842,
|
9391 |
+
"logits/rejected": 0.15132752060890198,
|
9392 |
+
"logps/chosen": -26.486125946044922,
|
9393 |
+
"logps/rejected": -577.5296020507812,
|
9394 |
+
"loss": 12025.9992,
|
9395 |
+
"rewards/accuracies": 0.987500011920929,
|
9396 |
+
"rewards/chosen": 0.1961621642112732,
|
9397 |
+
"rewards/margins": 0.5460348725318909,
|
9398 |
+
"rewards/rejected": -0.34987273812294006,
|
9399 |
+
"step": 6250
|
9400 |
+
},
|
9401 |
+
{
|
9402 |
+
"epoch": 31.741772151898733,
|
9403 |
+
"grad_norm": 113204.1607298857,
|
9404 |
+
"learning_rate": 6.518332811031025e-08,
|
9405 |
+
"logits/chosen": -0.7701491117477417,
|
9406 |
+
"logits/rejected": -0.5652084946632385,
|
9407 |
+
"logps/chosen": -30.580230712890625,
|
9408 |
+
"logps/rejected": -575.9344482421875,
|
9409 |
+
"loss": 12611.7422,
|
9410 |
+
"rewards/accuracies": 0.987500011920929,
|
9411 |
+
"rewards/chosen": 0.20266905426979065,
|
9412 |
+
"rewards/margins": 0.5443531274795532,
|
9413 |
+
"rewards/rejected": -0.34168410301208496,
|
9414 |
+
"step": 6260
|
9415 |
+
},
|
9416 |
+
{
|
9417 |
+
"epoch": 31.792405063291138,
|
9418 |
+
"grad_norm": 170084.77349090017,
|
9419 |
+
"learning_rate": 6.439987464744594e-08,
|
9420 |
+
"logits/chosen": 0.8593052625656128,
|
9421 |
+
"logits/rejected": 1.1197197437286377,
|
9422 |
+
"logps/chosen": -26.577016830444336,
|
9423 |
+
"logps/rejected": -555.6820068359375,
|
9424 |
+
"loss": 12234.5422,
|
9425 |
+
"rewards/accuracies": 0.987500011920929,
|
9426 |
+
"rewards/chosen": 0.19716337323188782,
|
9427 |
+
"rewards/margins": 0.5328342318534851,
|
9428 |
+
"rewards/rejected": -0.33567091822624207,
|
9429 |
+
"step": 6270
|
9430 |
+
},
|
9431 |
+
{
|
9432 |
+
"epoch": 31.843037974683543,
|
9433 |
+
"grad_norm": 235274.58346107465,
|
9434 |
+
"learning_rate": 6.361642118458163e-08,
|
9435 |
+
"logits/chosen": -1.7307960987091064,
|
9436 |
+
"logits/rejected": -1.3535115718841553,
|
9437 |
+
"logps/chosen": -23.92806625366211,
|
9438 |
+
"logps/rejected": -565.2352294921875,
|
9439 |
+
"loss": 12517.5156,
|
9440 |
+
"rewards/accuracies": 0.987500011920929,
|
9441 |
+
"rewards/chosen": 0.2007029801607132,
|
9442 |
+
"rewards/margins": 0.5428507924079895,
|
9443 |
+
"rewards/rejected": -0.3421478271484375,
|
9444 |
+
"step": 6280
|
9445 |
+
},
|
9446 |
+
{
|
9447 |
+
"epoch": 31.89367088607595,
|
9448 |
+
"grad_norm": 190203.888446938,
|
9449 |
+
"learning_rate": 6.283296772171732e-08,
|
9450 |
+
"logits/chosen": -0.9662951231002808,
|
9451 |
+
"logits/rejected": -0.45983943343162537,
|
9452 |
+
"logps/chosen": -26.488794326782227,
|
9453 |
+
"logps/rejected": -565.1602783203125,
|
9454 |
+
"loss": 12050.4156,
|
9455 |
+
"rewards/accuracies": 0.987500011920929,
|
9456 |
+
"rewards/chosen": 0.20456723868846893,
|
9457 |
+
"rewards/margins": 0.5392366051673889,
|
9458 |
+
"rewards/rejected": -0.3346693515777588,
|
9459 |
+
"step": 6290
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 31.944303797468354,
|
9463 |
+
"grad_norm": 169175.47682307824,
|
9464 |
+
"learning_rate": 6.204951425885302e-08,
|
9465 |
+
"logits/chosen": -1.9982364177703857,
|
9466 |
+
"logits/rejected": -1.282958745956421,
|
9467 |
+
"logps/chosen": -25.263113021850586,
|
9468 |
+
"logps/rejected": -584.7576293945312,
|
9469 |
+
"loss": 11806.3297,
|
9470 |
+
"rewards/accuracies": 1.0,
|
9471 |
+
"rewards/chosen": 0.20914848148822784,
|
9472 |
+
"rewards/margins": 0.5613822937011719,
|
9473 |
+
"rewards/rejected": -0.35223376750946045,
|
9474 |
+
"step": 6300
|
9475 |
+
},
|
9476 |
+
{
|
9477 |
+
"epoch": 31.99493670886076,
|
9478 |
+
"grad_norm": 142938.702725119,
|
9479 |
+
"learning_rate": 6.126606079598871e-08,
|
9480 |
+
"logits/chosen": -2.084618091583252,
|
9481 |
+
"logits/rejected": -1.6745023727416992,
|
9482 |
+
"logps/chosen": -24.918956756591797,
|
9483 |
+
"logps/rejected": -603.4859619140625,
|
9484 |
+
"loss": 12022.5133,
|
9485 |
+
"rewards/accuracies": 1.0,
|
9486 |
+
"rewards/chosen": 0.21575181186199188,
|
9487 |
+
"rewards/margins": 0.5819977521896362,
|
9488 |
+
"rewards/rejected": -0.36624595522880554,
|
9489 |
+
"step": 6310
|
9490 |
}
|
9491 |
],
|
9492 |
"logging_steps": 10,
|