cat-searcher
commited on
Commit
•
39281e8
1
Parent(s):
d679692
Training in progress, epoch 30, checkpoint
Browse files- last-checkpoint/global_step6113/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6113/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step6113/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0499858123b31f7a4204d087c039e8726dc6ad4603df074753febcc04419561a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e72e84707d58bb67c0db60989f9c153473ac467cba53022b78d75a841fd9ab53
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdf672e0ab53201eff10260483f20107ede551a0770ffa18e03d8cf6f9fa4bb8
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e65dea90b6ebad258ebeb2480617854af6fb764c69f32a09290d8098b409ffa
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac88c49031eafd7094a57d19a07adcfab7e8035bf8f4fce77718cd7ad6881af9
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c04cd0d89bc49c2d22736060318f6bd9a096c0bb6aa02df4860ea10b269624c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d6d7ca8d8feae576757708b7a34c4018a12ee540735d04c0c8c1b3de1489bf4
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b84296edebd96febb1ac999c83a53f11cf291da4cbe9d06d3d2c319e1eabd737
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6113/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:561724fd38716552727a57affc0c5d116f64c8e53da7c31f5b707480dfd32bb9
|
3 |
+
size 85570
|
last-checkpoint/global_step6113/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8064a387c0abdcce514d00b06b69a41e661ce183e377f111f476ca71082d97f
|
3 |
+
size 85506
|
last-checkpoint/global_step6113/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0191b2ffba6a0b1a7fca25147384ea4b659723c25754ef7916d3cd7d62aa692b
|
3 |
+
size 85506
|
last-checkpoint/global_step6113/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66690d8256e090f00ce5ead20a2bde13e0e3ed458334da20baf9b28dcc486813
|
3 |
+
size 85506
|
last-checkpoint/global_step6113/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df4ee2a67dc3236ef4d1dd231d6108de62d9c6c7105ddf2d1dc172d972f4a138
|
3 |
+
size 85506
|
last-checkpoint/global_step6113/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38355ae4214f9d9d2b1a486a99713a7ab23cdb8f2d880e01fbe59be80673fff1
|
3 |
+
size 85506
|
last-checkpoint/global_step6113/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96454f7b3634ce62258c7197cf4b4befbf6c5f3323c69fd38b68cab2b2a39bd1
|
3 |
+
size 85506
|
last-checkpoint/global_step6113/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e95f4cf5fdb61c35b72378382a3517094d3e980be021c62c73e593c994774f6f
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step6113
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a98e86a0252e0053a50f39ee5da1bc91fc6cc770c00bdf4879a07beb1bfb774
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90d998420cc3a03c1798986731cd33f00d6a7cd27ade984be3af7b6e9ba76532
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ac3887e0b6e47ee941f0099109d55fe8c4958125034b41513ac29b0a09c9e86
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de0272654fc4c603ac86d1e7e8bda566a95de507f7e48193f810ed9d664b308b
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09a612c5aff75737f917d89b930ca876fddca4236d73b7c4e2122a374a8c7279
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15d130c7a2df3aa45929d007a20cc7aee23015b41b9d24357c85880be9954c25
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c0852f2ccb050a5008bb4e71f4b3a6ea8b845e136589936cf10d03133438507
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cad3ab70f668806214a1537af9f7d2fc6de4401f7eec2f544f9059052862cb5
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57dc573e9fb6a6cc583ada6a4737e432c792b6361f3eb47de697d69b75f09d21
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c46a2fd5508f6b24b7fd294db0c5ae6e928c7174f4e3caeeeeca276dd88690c5
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12fdc5d527314ffb69c899c4aaadd4582bc4cb2067e14fecb7c5f8dd01262d7f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 30.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8887,6 +8887,306 @@
|
|
8887 |
"rewards/margins": 0.5534237027168274,
|
8888 |
"rewards/rejected": -0.34499144554138184,
|
8889 |
"step": 5910
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8890 |
}
|
8891 |
],
|
8892 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 30.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 6113,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8887 |
"rewards/margins": 0.5534237027168274,
|
8888 |
"rewards/rejected": -0.34499144554138184,
|
8889 |
"step": 5910
|
8890 |
+
},
|
8891 |
+
{
|
8892 |
+
"epoch": 30.020253164556962,
|
8893 |
+
"grad_norm": 218071.18905642498,
|
8894 |
+
"learning_rate": 9.182074584769664e-08,
|
8895 |
+
"logits/chosen": -0.20139971375465393,
|
8896 |
+
"logits/rejected": 0.6374796628952026,
|
8897 |
+
"logps/chosen": -36.04420852661133,
|
8898 |
+
"logps/rejected": -585.3655395507812,
|
8899 |
+
"loss": 12139.8164,
|
8900 |
+
"rewards/accuracies": 0.987500011920929,
|
8901 |
+
"rewards/chosen": 0.20962996780872345,
|
8902 |
+
"rewards/margins": 0.5547569394111633,
|
8903 |
+
"rewards/rejected": -0.3451269865036011,
|
8904 |
+
"step": 5920
|
8905 |
+
},
|
8906 |
+
{
|
8907 |
+
"epoch": 30.070886075949367,
|
8908 |
+
"grad_norm": 199502.22634833233,
|
8909 |
+
"learning_rate": 9.103729238483233e-08,
|
8910 |
+
"logits/chosen": -0.5093935132026672,
|
8911 |
+
"logits/rejected": -0.9036226272583008,
|
8912 |
+
"logps/chosen": -32.292423248291016,
|
8913 |
+
"logps/rejected": -584.6748046875,
|
8914 |
+
"loss": 11463.5555,
|
8915 |
+
"rewards/accuracies": 1.0,
|
8916 |
+
"rewards/chosen": 0.20751234889030457,
|
8917 |
+
"rewards/margins": 0.5532687902450562,
|
8918 |
+
"rewards/rejected": -0.345756471157074,
|
8919 |
+
"step": 5930
|
8920 |
+
},
|
8921 |
+
{
|
8922 |
+
"epoch": 30.121518987341773,
|
8923 |
+
"grad_norm": 164683.94241544002,
|
8924 |
+
"learning_rate": 9.025383892196802e-08,
|
8925 |
+
"logits/chosen": -1.2027417421340942,
|
8926 |
+
"logits/rejected": -0.21418258547782898,
|
8927 |
+
"logps/chosen": -38.00572967529297,
|
8928 |
+
"logps/rejected": -542.5321044921875,
|
8929 |
+
"loss": 12248.6938,
|
8930 |
+
"rewards/accuracies": 0.9624999761581421,
|
8931 |
+
"rewards/chosen": 0.1922551691532135,
|
8932 |
+
"rewards/margins": 0.5097079277038574,
|
8933 |
+
"rewards/rejected": -0.3174527585506439,
|
8934 |
+
"step": 5940
|
8935 |
+
},
|
8936 |
+
{
|
8937 |
+
"epoch": 30.172151898734178,
|
8938 |
+
"grad_norm": 209885.7696817789,
|
8939 |
+
"learning_rate": 8.947038545910373e-08,
|
8940 |
+
"logits/chosen": -0.5836046934127808,
|
8941 |
+
"logits/rejected": -0.049278389662504196,
|
8942 |
+
"logps/chosen": -26.44875144958496,
|
8943 |
+
"logps/rejected": -577.2633056640625,
|
8944 |
+
"loss": 11882.8156,
|
8945 |
+
"rewards/accuracies": 1.0,
|
8946 |
+
"rewards/chosen": 0.2028985321521759,
|
8947 |
+
"rewards/margins": 0.548152506351471,
|
8948 |
+
"rewards/rejected": -0.34525397419929504,
|
8949 |
+
"step": 5950
|
8950 |
+
},
|
8951 |
+
{
|
8952 |
+
"epoch": 30.222784810126583,
|
8953 |
+
"grad_norm": 116064.20956709805,
|
8954 |
+
"learning_rate": 8.868693199623942e-08,
|
8955 |
+
"logits/chosen": -0.3441212773323059,
|
8956 |
+
"logits/rejected": 0.3469446897506714,
|
8957 |
+
"logps/chosen": -29.866031646728516,
|
8958 |
+
"logps/rejected": -576.8469848632812,
|
8959 |
+
"loss": 11899.9906,
|
8960 |
+
"rewards/accuracies": 0.987500011920929,
|
8961 |
+
"rewards/chosen": 0.20657262206077576,
|
8962 |
+
"rewards/margins": 0.5527979731559753,
|
8963 |
+
"rewards/rejected": -0.3462253212928772,
|
8964 |
+
"step": 5960
|
8965 |
+
},
|
8966 |
+
{
|
8967 |
+
"epoch": 30.27341772151899,
|
8968 |
+
"grad_norm": 213446.8577722312,
|
8969 |
+
"learning_rate": 8.790347853337511e-08,
|
8970 |
+
"logits/chosen": -1.195245623588562,
|
8971 |
+
"logits/rejected": -1.5595389604568481,
|
8972 |
+
"logps/chosen": -26.48971939086914,
|
8973 |
+
"logps/rejected": -562.9793090820312,
|
8974 |
+
"loss": 12288.4188,
|
8975 |
+
"rewards/accuracies": 0.9624999761581421,
|
8976 |
+
"rewards/chosen": 0.19797027111053467,
|
8977 |
+
"rewards/margins": 0.5365854501724243,
|
8978 |
+
"rewards/rejected": -0.3386152386665344,
|
8979 |
+
"step": 5970
|
8980 |
+
},
|
8981 |
+
{
|
8982 |
+
"epoch": 30.324050632911394,
|
8983 |
+
"grad_norm": 150392.6550831942,
|
8984 |
+
"learning_rate": 8.712002507051081e-08,
|
8985 |
+
"logits/chosen": -0.636971116065979,
|
8986 |
+
"logits/rejected": -0.8326961398124695,
|
8987 |
+
"logps/chosen": -31.82355308532715,
|
8988 |
+
"logps/rejected": -572.6309814453125,
|
8989 |
+
"loss": 11735.9594,
|
8990 |
+
"rewards/accuracies": 0.9750000238418579,
|
8991 |
+
"rewards/chosen": 0.20741339027881622,
|
8992 |
+
"rewards/margins": 0.5429075360298157,
|
8993 |
+
"rewards/rejected": -0.33549413084983826,
|
8994 |
+
"step": 5980
|
8995 |
+
},
|
8996 |
+
{
|
8997 |
+
"epoch": 30.374683544303796,
|
8998 |
+
"grad_norm": 248873.00017903763,
|
8999 |
+
"learning_rate": 8.63365716076465e-08,
|
9000 |
+
"logits/chosen": -0.8763412237167358,
|
9001 |
+
"logits/rejected": -0.38471752405166626,
|
9002 |
+
"logps/chosen": -33.753509521484375,
|
9003 |
+
"logps/rejected": -577.4928588867188,
|
9004 |
+
"loss": 11981.0336,
|
9005 |
+
"rewards/accuracies": 0.9750000238418579,
|
9006 |
+
"rewards/chosen": 0.20298103988170624,
|
9007 |
+
"rewards/margins": 0.5445905923843384,
|
9008 |
+
"rewards/rejected": -0.34160953760147095,
|
9009 |
+
"step": 5990
|
9010 |
+
},
|
9011 |
+
{
|
9012 |
+
"epoch": 30.4253164556962,
|
9013 |
+
"grad_norm": 247123.70966936232,
|
9014 |
+
"learning_rate": 8.555311814478219e-08,
|
9015 |
+
"logits/chosen": -1.4638581275939941,
|
9016 |
+
"logits/rejected": -1.6560137271881104,
|
9017 |
+
"logps/chosen": -27.02420425415039,
|
9018 |
+
"logps/rejected": -579.2672119140625,
|
9019 |
+
"loss": 12743.5711,
|
9020 |
+
"rewards/accuracies": 0.987500011920929,
|
9021 |
+
"rewards/chosen": 0.20767991244792938,
|
9022 |
+
"rewards/margins": 0.5485936403274536,
|
9023 |
+
"rewards/rejected": -0.3409137427806854,
|
9024 |
+
"step": 6000
|
9025 |
+
},
|
9026 |
+
{
|
9027 |
+
"epoch": 30.475949367088607,
|
9028 |
+
"grad_norm": 152247.200364489,
|
9029 |
+
"learning_rate": 8.476966468191789e-08,
|
9030 |
+
"logits/chosen": -1.2626516819000244,
|
9031 |
+
"logits/rejected": -1.3656198978424072,
|
9032 |
+
"logps/chosen": -30.586597442626953,
|
9033 |
+
"logps/rejected": -564.9951782226562,
|
9034 |
+
"loss": 12138.7328,
|
9035 |
+
"rewards/accuracies": 0.987500011920929,
|
9036 |
+
"rewards/chosen": 0.19354796409606934,
|
9037 |
+
"rewards/margins": 0.5365390181541443,
|
9038 |
+
"rewards/rejected": -0.34299105405807495,
|
9039 |
+
"step": 6010
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 30.526582278481012,
|
9043 |
+
"grad_norm": 153551.3953399981,
|
9044 |
+
"learning_rate": 8.398621121905358e-08,
|
9045 |
+
"logits/chosen": -0.8625293970108032,
|
9046 |
+
"logits/rejected": -1.6173267364501953,
|
9047 |
+
"logps/chosen": -23.908416748046875,
|
9048 |
+
"logps/rejected": -591.5709228515625,
|
9049 |
+
"loss": 12247.5672,
|
9050 |
+
"rewards/accuracies": 1.0,
|
9051 |
+
"rewards/chosen": 0.2063622921705246,
|
9052 |
+
"rewards/margins": 0.5622067451477051,
|
9053 |
+
"rewards/rejected": -0.3558444678783417,
|
9054 |
+
"step": 6020
|
9055 |
+
},
|
9056 |
+
{
|
9057 |
+
"epoch": 30.577215189873417,
|
9058 |
+
"grad_norm": 247558.34356145174,
|
9059 |
+
"learning_rate": 8.320275775618927e-08,
|
9060 |
+
"logits/chosen": -0.6456829309463501,
|
9061 |
+
"logits/rejected": -0.25254157185554504,
|
9062 |
+
"logps/chosen": -30.65958023071289,
|
9063 |
+
"logps/rejected": -572.8914794921875,
|
9064 |
+
"loss": 11907.7406,
|
9065 |
+
"rewards/accuracies": 0.987500011920929,
|
9066 |
+
"rewards/chosen": 0.20363232493400574,
|
9067 |
+
"rewards/margins": 0.5450500845909119,
|
9068 |
+
"rewards/rejected": -0.34141772985458374,
|
9069 |
+
"step": 6030
|
9070 |
+
},
|
9071 |
+
{
|
9072 |
+
"epoch": 30.627848101265823,
|
9073 |
+
"grad_norm": 162045.80468301394,
|
9074 |
+
"learning_rate": 8.241930429332496e-08,
|
9075 |
+
"logits/chosen": 0.5193571448326111,
|
9076 |
+
"logits/rejected": 1.0150249004364014,
|
9077 |
+
"logps/chosen": -21.961605072021484,
|
9078 |
+
"logps/rejected": -586.28369140625,
|
9079 |
+
"loss": 11870.2594,
|
9080 |
+
"rewards/accuracies": 1.0,
|
9081 |
+
"rewards/chosen": 0.20082764327526093,
|
9082 |
+
"rewards/margins": 0.5631116032600403,
|
9083 |
+
"rewards/rejected": -0.36228394508361816,
|
9084 |
+
"step": 6040
|
9085 |
+
},
|
9086 |
+
{
|
9087 |
+
"epoch": 30.678481012658228,
|
9088 |
+
"grad_norm": 183677.73161043233,
|
9089 |
+
"learning_rate": 8.163585083046067e-08,
|
9090 |
+
"logits/chosen": -2.2690348625183105,
|
9091 |
+
"logits/rejected": -1.8725353479385376,
|
9092 |
+
"logps/chosen": -34.20100021362305,
|
9093 |
+
"logps/rejected": -571.948486328125,
|
9094 |
+
"loss": 11952.7477,
|
9095 |
+
"rewards/accuracies": 0.9750000238418579,
|
9096 |
+
"rewards/chosen": 0.2089938223361969,
|
9097 |
+
"rewards/margins": 0.542193591594696,
|
9098 |
+
"rewards/rejected": -0.33319979906082153,
|
9099 |
+
"step": 6050
|
9100 |
+
},
|
9101 |
+
{
|
9102 |
+
"epoch": 30.729113924050633,
|
9103 |
+
"grad_norm": 206299.51509471133,
|
9104 |
+
"learning_rate": 8.085239736759636e-08,
|
9105 |
+
"logits/chosen": -1.8886245489120483,
|
9106 |
+
"logits/rejected": -1.428289532661438,
|
9107 |
+
"logps/chosen": -33.442771911621094,
|
9108 |
+
"logps/rejected": -577.7710571289062,
|
9109 |
+
"loss": 12094.3477,
|
9110 |
+
"rewards/accuracies": 0.9750000238418579,
|
9111 |
+
"rewards/chosen": 0.20627331733703613,
|
9112 |
+
"rewards/margins": 0.5446707606315613,
|
9113 |
+
"rewards/rejected": -0.3383975028991699,
|
9114 |
+
"step": 6060
|
9115 |
+
},
|
9116 |
+
{
|
9117 |
+
"epoch": 30.77974683544304,
|
9118 |
+
"grad_norm": 178455.47052363763,
|
9119 |
+
"learning_rate": 8.006894390473206e-08,
|
9120 |
+
"logits/chosen": -0.9159374237060547,
|
9121 |
+
"logits/rejected": -0.5700797438621521,
|
9122 |
+
"logps/chosen": -22.003402709960938,
|
9123 |
+
"logps/rejected": -588.1246948242188,
|
9124 |
+
"loss": 12967.7109,
|
9125 |
+
"rewards/accuracies": 1.0,
|
9126 |
+
"rewards/chosen": 0.20624502003192902,
|
9127 |
+
"rewards/margins": 0.5602100491523743,
|
9128 |
+
"rewards/rejected": -0.35396507382392883,
|
9129 |
+
"step": 6070
|
9130 |
+
},
|
9131 |
+
{
|
9132 |
+
"epoch": 30.830379746835444,
|
9133 |
+
"grad_norm": 188994.549896938,
|
9134 |
+
"learning_rate": 7.928549044186775e-08,
|
9135 |
+
"logits/chosen": -2.218046188354492,
|
9136 |
+
"logits/rejected": -2.298725128173828,
|
9137 |
+
"logps/chosen": -36.601036071777344,
|
9138 |
+
"logps/rejected": -578.4909057617188,
|
9139 |
+
"loss": 11942.907,
|
9140 |
+
"rewards/accuracies": 0.987500011920929,
|
9141 |
+
"rewards/chosen": 0.20413891971111298,
|
9142 |
+
"rewards/margins": 0.5424162149429321,
|
9143 |
+
"rewards/rejected": -0.33827728033065796,
|
9144 |
+
"step": 6080
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 30.88101265822785,
|
9148 |
+
"grad_norm": 226618.0916543629,
|
9149 |
+
"learning_rate": 7.850203697900344e-08,
|
9150 |
+
"logits/chosen": -0.8958581686019897,
|
9151 |
+
"logits/rejected": -0.3350396454334259,
|
9152 |
+
"logps/chosen": -27.914409637451172,
|
9153 |
+
"logps/rejected": -584.2742919921875,
|
9154 |
+
"loss": 12020.4094,
|
9155 |
+
"rewards/accuracies": 0.987500011920929,
|
9156 |
+
"rewards/chosen": 0.2059321403503418,
|
9157 |
+
"rewards/margins": 0.5559757947921753,
|
9158 |
+
"rewards/rejected": -0.3500436246395111,
|
9159 |
+
"step": 6090
|
9160 |
+
},
|
9161 |
+
{
|
9162 |
+
"epoch": 30.931645569620255,
|
9163 |
+
"grad_norm": 193720.76624447017,
|
9164 |
+
"learning_rate": 7.771858351613913e-08,
|
9165 |
+
"logits/chosen": -0.13203875720500946,
|
9166 |
+
"logits/rejected": -0.22968029975891113,
|
9167 |
+
"logps/chosen": -25.164508819580078,
|
9168 |
+
"logps/rejected": -573.2855224609375,
|
9169 |
+
"loss": 12096.6344,
|
9170 |
+
"rewards/accuracies": 0.987500011920929,
|
9171 |
+
"rewards/chosen": 0.1958034336566925,
|
9172 |
+
"rewards/margins": 0.5465744137763977,
|
9173 |
+
"rewards/rejected": -0.3507709503173828,
|
9174 |
+
"step": 6100
|
9175 |
+
},
|
9176 |
+
{
|
9177 |
+
"epoch": 30.98227848101266,
|
9178 |
+
"grad_norm": 177238.22636435836,
|
9179 |
+
"learning_rate": 7.693513005327483e-08,
|
9180 |
+
"logits/chosen": -2.0759382247924805,
|
9181 |
+
"logits/rejected": -1.4708411693572998,
|
9182 |
+
"logps/chosen": -28.522485733032227,
|
9183 |
+
"logps/rejected": -573.73681640625,
|
9184 |
+
"loss": 12017.775,
|
9185 |
+
"rewards/accuracies": 0.987500011920929,
|
9186 |
+
"rewards/chosen": 0.20944443345069885,
|
9187 |
+
"rewards/margins": 0.5498504042625427,
|
9188 |
+
"rewards/rejected": -0.34040600061416626,
|
9189 |
+
"step": 6110
|
9190 |
}
|
9191 |
],
|
9192 |
"logging_steps": 10,
|