cat-searcher
commited on
Commit
•
4656255
1
Parent(s):
e86348a
Training in progress, epoch 34, checkpoint
Browse files- last-checkpoint/global_step6706/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step6706/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +587 -2
last-checkpoint/global_step6706/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b42c56b1327b90cb9abc534edcfbb08932410debcd7305c0d1759e04bdc4d11
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9396272e9f609f397084631f7d4ffed08fff5c7a034e4194bfd0a7f72c70bfa1
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:591f9c4ae58767f09106f21436522ad83280a312251aec9632f74f3c9a6169e3
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49135e96f11f14c0edf4c5dffe815b40ae94488d644743ef2dd9de99786ea8b7
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f3f16c835dc750d8d496565e2bd2e880599b1ef853595956dcdd5d856fc3f2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6c1e57d8cf90c74381abe81ec860ea539ed7f1005ee89c7ae0fca24ef4c060e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:544f3afbdf2e5758131bde2e146c11a13beea591ed9ffd5bdc5734faad8a1043
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a8e4440e7e8b59cfb3cc26a05b82c5d57ea5084e4017e3c33d0e483da159561
|
3 |
+
size 2506176112
|
last-checkpoint/global_step6706/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ef8119caf8c39ec12de2099badeb63ad829ce60b1bfeeebb9e0517a09c0a976
|
3 |
+
size 85570
|
last-checkpoint/global_step6706/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a89445868a574a4e6ca673c951b38372184c1b6f771b3bfa85d93241a42213e9
|
3 |
+
size 85506
|
last-checkpoint/global_step6706/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d31316c948c7d38bc67e1c7147275a977f4cc81870a86da11f10564b176127c
|
3 |
+
size 85506
|
last-checkpoint/global_step6706/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b50dcfc583c1b3c6bea42d40dad8d32b734dc8be0c79d7e09f61d63bf79cccb7
|
3 |
+
size 85506
|
last-checkpoint/global_step6706/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e31df5e5d1299642990ab63df3f37154cc9b1fabcfec91715a33296b7735163
|
3 |
+
size 85506
|
last-checkpoint/global_step6706/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6db17cba5932b9a1f9ea2964606a3e4054625b730789b3dd91f659e92c3486c
|
3 |
+
size 85506
|
last-checkpoint/global_step6706/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49e00c574c29ae1516a5c427bcb11052789980cfebadc115603615c994101ab7
|
3 |
+
size 85506
|
last-checkpoint/global_step6706/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d51ee3a89f5d4297c41d5ecd7accd0c7f05aae37c660466fceca4c94b0656c3
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step6706
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dccd3cd5529e5f163e359e3786831ba7375631404b7ee8058d8b4408e5f6c21
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c84686bbc6149ef7c13cd1e65651e398a75147e6c2a6349adc73d12c6ffa69ab
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1824a3c32cb0f9c63783531dc708888703e1f1c3a24fcb1359a551096190eeb4
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef711a8d330873d0c4240280ee39d5aa7914332ddf901ffeea474d7a3676d68e
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc1c319ca557b1ad565c5491231c88b982788beb22edcc7662c65f10bac37a88
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bda4459d526a1e94ec4135282c75d79950f5463b1f31627ef8f677d8a590cadd
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c928635796240bfafd62cee0be31323babeadc7998190bc90de090567d3711fd
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e1e06f947b66a18ac20f7698fc3fe6fbf2a0fa9b5d6b3460904a385c167db64
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a5533c6799b2aca827472a96ea0b581da238e7b44cbde527a6a83e4244ea55b
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fd80eb1f10bbe498691d09f14b4f4a09102a449bc19f524f5b62f82768d3af5
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c25188d68c243cfb7b17f6acfbc1e55a94e4598b4924ebbd056f93551f70969
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9487,6 +9487,591 @@
|
|
9487 |
"rewards/margins": 0.5819977521896362,
|
9488 |
"rewards/rejected": -0.36624595522880554,
|
9489 |
"step": 6310
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9490 |
}
|
9491 |
],
|
9492 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 34.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 6706,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9487 |
"rewards/margins": 0.5819977521896362,
|
9488 |
"rewards/rejected": -0.36624595522880554,
|
9489 |
"step": 6310
|
9490 |
+
},
|
9491 |
+
{
|
9492 |
+
"epoch": 32.04556962025316,
|
9493 |
+
"grad_norm": 146925.77007874168,
|
9494 |
+
"learning_rate": 6.04826073331244e-08,
|
9495 |
+
"logits/chosen": -1.0771139860153198,
|
9496 |
+
"logits/rejected": -0.38963261246681213,
|
9497 |
+
"logps/chosen": -25.353687286376953,
|
9498 |
+
"logps/rejected": -599.3104248046875,
|
9499 |
+
"loss": 11649.7609,
|
9500 |
+
"rewards/accuracies": 1.0,
|
9501 |
+
"rewards/chosen": 0.21451549232006073,
|
9502 |
+
"rewards/margins": 0.5766840577125549,
|
9503 |
+
"rewards/rejected": -0.3621685206890106,
|
9504 |
+
"step": 6320
|
9505 |
+
},
|
9506 |
+
{
|
9507 |
+
"epoch": 32.09620253164557,
|
9508 |
+
"grad_norm": 94333.82344683389,
|
9509 |
+
"learning_rate": 5.96991538702601e-08,
|
9510 |
+
"logits/chosen": -2.162341356277466,
|
9511 |
+
"logits/rejected": -1.5530678033828735,
|
9512 |
+
"logps/chosen": -36.120880126953125,
|
9513 |
+
"logps/rejected": -594.9260864257812,
|
9514 |
+
"loss": 11919.4,
|
9515 |
+
"rewards/accuracies": 0.987500011920929,
|
9516 |
+
"rewards/chosen": 0.2106127291917801,
|
9517 |
+
"rewards/margins": 0.558625340461731,
|
9518 |
+
"rewards/rejected": -0.34801262617111206,
|
9519 |
+
"step": 6330
|
9520 |
+
},
|
9521 |
+
{
|
9522 |
+
"epoch": 32.14683544303797,
|
9523 |
+
"grad_norm": 144438.33677050017,
|
9524 |
+
"learning_rate": 5.8915700407395795e-08,
|
9525 |
+
"logits/chosen": -0.8229999542236328,
|
9526 |
+
"logits/rejected": -0.037537313997745514,
|
9527 |
+
"logps/chosen": -25.43358612060547,
|
9528 |
+
"logps/rejected": -557.636474609375,
|
9529 |
+
"loss": 11297.6063,
|
9530 |
+
"rewards/accuracies": 0.9750000238418579,
|
9531 |
+
"rewards/chosen": 0.1968574970960617,
|
9532 |
+
"rewards/margins": 0.532370388507843,
|
9533 |
+
"rewards/rejected": -0.33551284670829773,
|
9534 |
+
"step": 6340
|
9535 |
+
},
|
9536 |
+
{
|
9537 |
+
"epoch": 32.19746835443038,
|
9538 |
+
"grad_norm": 109693.94525690017,
|
9539 |
+
"learning_rate": 5.813224694453149e-08,
|
9540 |
+
"logits/chosen": -3.077913761138916,
|
9541 |
+
"logits/rejected": -2.4543375968933105,
|
9542 |
+
"logps/chosen": -26.92588233947754,
|
9543 |
+
"logps/rejected": -583.3746337890625,
|
9544 |
+
"loss": 12147.5016,
|
9545 |
+
"rewards/accuracies": 0.9750000238418579,
|
9546 |
+
"rewards/chosen": 0.21152964234352112,
|
9547 |
+
"rewards/margins": 0.5570891499519348,
|
9548 |
+
"rewards/rejected": -0.3455595374107361,
|
9549 |
+
"step": 6350
|
9550 |
+
},
|
9551 |
+
{
|
9552 |
+
"epoch": 32.24810126582278,
|
9553 |
+
"grad_norm": 94464.04824246689,
|
9554 |
+
"learning_rate": 5.734879348166719e-08,
|
9555 |
+
"logits/chosen": -0.08146251738071442,
|
9556 |
+
"logits/rejected": -0.1943734884262085,
|
9557 |
+
"logps/chosen": -38.933929443359375,
|
9558 |
+
"logps/rejected": -599.4444580078125,
|
9559 |
+
"loss": 11706.7859,
|
9560 |
+
"rewards/accuracies": 0.987500011920929,
|
9561 |
+
"rewards/chosen": 0.21242408454418182,
|
9562 |
+
"rewards/margins": 0.5596734881401062,
|
9563 |
+
"rewards/rejected": -0.34724941849708557,
|
9564 |
+
"step": 6360
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 32.29873417721519,
|
9568 |
+
"grad_norm": 93779.41167523999,
|
9569 |
+
"learning_rate": 5.656534001880288e-08,
|
9570 |
+
"logits/chosen": 0.4058389663696289,
|
9571 |
+
"logits/rejected": 0.994676947593689,
|
9572 |
+
"logps/chosen": -21.240737915039062,
|
9573 |
+
"logps/rejected": -573.2392578125,
|
9574 |
+
"loss": 12153.6359,
|
9575 |
+
"rewards/accuracies": 0.9750000238418579,
|
9576 |
+
"rewards/chosen": 0.19539888203144073,
|
9577 |
+
"rewards/margins": 0.550510048866272,
|
9578 |
+
"rewards/rejected": -0.35511118173599243,
|
9579 |
+
"step": 6370
|
9580 |
+
},
|
9581 |
+
{
|
9582 |
+
"epoch": 32.34936708860759,
|
9583 |
+
"grad_norm": 215459.26677533987,
|
9584 |
+
"learning_rate": 5.5781886555938573e-08,
|
9585 |
+
"logits/chosen": -1.0755536556243896,
|
9586 |
+
"logits/rejected": -0.2684146761894226,
|
9587 |
+
"logps/chosen": -25.781116485595703,
|
9588 |
+
"logps/rejected": -580.9659423828125,
|
9589 |
+
"loss": 11508.8133,
|
9590 |
+
"rewards/accuracies": 0.987500011920929,
|
9591 |
+
"rewards/chosen": 0.21089033782482147,
|
9592 |
+
"rewards/margins": 0.5592586994171143,
|
9593 |
+
"rewards/rejected": -0.34836840629577637,
|
9594 |
+
"step": 6380
|
9595 |
+
},
|
9596 |
+
{
|
9597 |
+
"epoch": 32.4,
|
9598 |
+
"grad_norm": 164612.93717131627,
|
9599 |
+
"learning_rate": 5.4998433093074266e-08,
|
9600 |
+
"logits/chosen": -2.730407238006592,
|
9601 |
+
"logits/rejected": -2.2623066902160645,
|
9602 |
+
"logps/chosen": -38.27416229248047,
|
9603 |
+
"logps/rejected": -612.3323364257812,
|
9604 |
+
"loss": 10969.9328,
|
9605 |
+
"rewards/accuracies": 0.987500011920929,
|
9606 |
+
"rewards/chosen": 0.22319836914539337,
|
9607 |
+
"rewards/margins": 0.573035478591919,
|
9608 |
+
"rewards/rejected": -0.34983712434768677,
|
9609 |
+
"step": 6390
|
9610 |
+
},
|
9611 |
+
{
|
9612 |
+
"epoch": 32.450632911392404,
|
9613 |
+
"grad_norm": 140032.81053392185,
|
9614 |
+
"learning_rate": 5.421497963020996e-08,
|
9615 |
+
"logits/chosen": -0.6492301821708679,
|
9616 |
+
"logits/rejected": -0.778862476348877,
|
9617 |
+
"logps/chosen": -28.754650115966797,
|
9618 |
+
"logps/rejected": -591.8221435546875,
|
9619 |
+
"loss": 12521.7703,
|
9620 |
+
"rewards/accuracies": 1.0,
|
9621 |
+
"rewards/chosen": 0.21057042479515076,
|
9622 |
+
"rewards/margins": 0.5634862780570984,
|
9623 |
+
"rewards/rejected": -0.35291582345962524,
|
9624 |
+
"step": 6400
|
9625 |
+
},
|
9626 |
+
{
|
9627 |
+
"epoch": 32.50126582278481,
|
9628 |
+
"grad_norm": 102205.70485715618,
|
9629 |
+
"learning_rate": 5.343152616734566e-08,
|
9630 |
+
"logits/chosen": -0.9864907264709473,
|
9631 |
+
"logits/rejected": -0.19051684439182281,
|
9632 |
+
"logps/chosen": -29.4318904876709,
|
9633 |
+
"logps/rejected": -605.131103515625,
|
9634 |
+
"loss": 11591.8508,
|
9635 |
+
"rewards/accuracies": 1.0,
|
9636 |
+
"rewards/chosen": 0.2185964584350586,
|
9637 |
+
"rewards/margins": 0.579878032207489,
|
9638 |
+
"rewards/rejected": -0.3612816333770752,
|
9639 |
+
"step": 6410
|
9640 |
+
},
|
9641 |
+
{
|
9642 |
+
"epoch": 32.551898734177215,
|
9643 |
+
"grad_norm": 103047.13529668628,
|
9644 |
+
"learning_rate": 5.264807270448135e-08,
|
9645 |
+
"logits/chosen": -2.3946361541748047,
|
9646 |
+
"logits/rejected": -1.8663170337677002,
|
9647 |
+
"logps/chosen": -22.362850189208984,
|
9648 |
+
"logps/rejected": -582.4278564453125,
|
9649 |
+
"loss": 11901.1398,
|
9650 |
+
"rewards/accuracies": 1.0,
|
9651 |
+
"rewards/chosen": 0.21427400410175323,
|
9652 |
+
"rewards/margins": 0.5642385482788086,
|
9653 |
+
"rewards/rejected": -0.34996455907821655,
|
9654 |
+
"step": 6420
|
9655 |
+
},
|
9656 |
+
{
|
9657 |
+
"epoch": 32.60253164556962,
|
9658 |
+
"grad_norm": 86074.947460872,
|
9659 |
+
"learning_rate": 5.1864619241617044e-08,
|
9660 |
+
"logits/chosen": 0.2598368227481842,
|
9661 |
+
"logits/rejected": 0.16884984076023102,
|
9662 |
+
"logps/chosen": -22.76316261291504,
|
9663 |
+
"logps/rejected": -594.866455078125,
|
9664 |
+
"loss": 12333.5344,
|
9665 |
+
"rewards/accuracies": 1.0,
|
9666 |
+
"rewards/chosen": 0.2121623456478119,
|
9667 |
+
"rewards/margins": 0.5697360038757324,
|
9668 |
+
"rewards/rejected": -0.35757365822792053,
|
9669 |
+
"step": 6430
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 32.653164556962025,
|
9673 |
+
"grad_norm": 137970.73954909868,
|
9674 |
+
"learning_rate": 5.108116577875274e-08,
|
9675 |
+
"logits/chosen": -0.11699090898036957,
|
9676 |
+
"logits/rejected": 0.11212899535894394,
|
9677 |
+
"logps/chosen": -29.464065551757812,
|
9678 |
+
"logps/rejected": -573.3801879882812,
|
9679 |
+
"loss": 11953.9641,
|
9680 |
+
"rewards/accuracies": 0.987500011920929,
|
9681 |
+
"rewards/chosen": 0.21537606418132782,
|
9682 |
+
"rewards/margins": 0.5438817739486694,
|
9683 |
+
"rewards/rejected": -0.3285056948661804,
|
9684 |
+
"step": 6440
|
9685 |
+
},
|
9686 |
+
{
|
9687 |
+
"epoch": 32.70379746835443,
|
9688 |
+
"grad_norm": 460796.64629538235,
|
9689 |
+
"learning_rate": 5.029771231588843e-08,
|
9690 |
+
"logits/chosen": -1.4031693935394287,
|
9691 |
+
"logits/rejected": -2.1060502529144287,
|
9692 |
+
"logps/chosen": -23.794132232666016,
|
9693 |
+
"logps/rejected": -581.7036743164062,
|
9694 |
+
"loss": 12159.9719,
|
9695 |
+
"rewards/accuracies": 0.987500011920929,
|
9696 |
+
"rewards/chosen": 0.20509609580039978,
|
9697 |
+
"rewards/margins": 0.5560418367385864,
|
9698 |
+
"rewards/rejected": -0.35094568133354187,
|
9699 |
+
"step": 6450
|
9700 |
+
},
|
9701 |
+
{
|
9702 |
+
"epoch": 32.754430379746836,
|
9703 |
+
"grad_norm": 88571.49642806537,
|
9704 |
+
"learning_rate": 4.951425885302413e-08,
|
9705 |
+
"logits/chosen": -0.29163846373558044,
|
9706 |
+
"logits/rejected": 0.15456560254096985,
|
9707 |
+
"logps/chosen": -19.800487518310547,
|
9708 |
+
"logps/rejected": -562.6231689453125,
|
9709 |
+
"loss": 11758.9578,
|
9710 |
+
"rewards/accuracies": 1.0,
|
9711 |
+
"rewards/chosen": 0.2048061192035675,
|
9712 |
+
"rewards/margins": 0.5433157682418823,
|
9713 |
+
"rewards/rejected": -0.33850961923599243,
|
9714 |
+
"step": 6460
|
9715 |
+
},
|
9716 |
+
{
|
9717 |
+
"epoch": 32.80506329113924,
|
9718 |
+
"grad_norm": 166818.40028028333,
|
9719 |
+
"learning_rate": 4.873080539015982e-08,
|
9720 |
+
"logits/chosen": 0.3278934061527252,
|
9721 |
+
"logits/rejected": 0.6011670827865601,
|
9722 |
+
"logps/chosen": -33.445350646972656,
|
9723 |
+
"logps/rejected": -590.470703125,
|
9724 |
+
"loss": 11395.1164,
|
9725 |
+
"rewards/accuracies": 0.987500011920929,
|
9726 |
+
"rewards/chosen": 0.2123481035232544,
|
9727 |
+
"rewards/margins": 0.5555016994476318,
|
9728 |
+
"rewards/rejected": -0.34315359592437744,
|
9729 |
+
"step": 6470
|
9730 |
+
},
|
9731 |
+
{
|
9732 |
+
"epoch": 32.85569620253165,
|
9733 |
+
"grad_norm": 80619.8591659213,
|
9734 |
+
"learning_rate": 4.7947351927295515e-08,
|
9735 |
+
"logits/chosen": -1.3291213512420654,
|
9736 |
+
"logits/rejected": -1.6056814193725586,
|
9737 |
+
"logps/chosen": -29.16250228881836,
|
9738 |
+
"logps/rejected": -598.3140869140625,
|
9739 |
+
"loss": 11908.6562,
|
9740 |
+
"rewards/accuracies": 1.0,
|
9741 |
+
"rewards/chosen": 0.21245749294757843,
|
9742 |
+
"rewards/margins": 0.5684391856193542,
|
9743 |
+
"rewards/rejected": -0.3559816777706146,
|
9744 |
+
"step": 6480
|
9745 |
+
},
|
9746 |
+
{
|
9747 |
+
"epoch": 32.90632911392405,
|
9748 |
+
"grad_norm": 109452.38261580766,
|
9749 |
+
"learning_rate": 4.716389846443121e-08,
|
9750 |
+
"logits/chosen": -2.2227654457092285,
|
9751 |
+
"logits/rejected": -2.1318516731262207,
|
9752 |
+
"logps/chosen": -27.57879638671875,
|
9753 |
+
"logps/rejected": -593.1817626953125,
|
9754 |
+
"loss": 11900.8148,
|
9755 |
+
"rewards/accuracies": 1.0,
|
9756 |
+
"rewards/chosen": 0.2101704627275467,
|
9757 |
+
"rewards/margins": 0.565523624420166,
|
9758 |
+
"rewards/rejected": -0.35535311698913574,
|
9759 |
+
"step": 6490
|
9760 |
+
},
|
9761 |
+
{
|
9762 |
+
"epoch": 32.95696202531646,
|
9763 |
+
"grad_norm": 146037.74057243837,
|
9764 |
+
"learning_rate": 4.63804450015669e-08,
|
9765 |
+
"logits/chosen": -0.4855597913265228,
|
9766 |
+
"logits/rejected": -0.07905157655477524,
|
9767 |
+
"logps/chosen": -32.26173782348633,
|
9768 |
+
"logps/rejected": -582.983154296875,
|
9769 |
+
"loss": 12785.9484,
|
9770 |
+
"rewards/accuracies": 0.987500011920929,
|
9771 |
+
"rewards/chosen": 0.2102789580821991,
|
9772 |
+
"rewards/margins": 0.5541440844535828,
|
9773 |
+
"rewards/rejected": -0.34386518597602844,
|
9774 |
+
"step": 6500
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 33.00759493670886,
|
9778 |
+
"grad_norm": 80554.44381289573,
|
9779 |
+
"learning_rate": 4.55969915387026e-08,
|
9780 |
+
"logits/chosen": -1.16013503074646,
|
9781 |
+
"logits/rejected": -1.237755537033081,
|
9782 |
+
"logps/chosen": -22.434879302978516,
|
9783 |
+
"logps/rejected": -572.4281005859375,
|
9784 |
+
"loss": 11892.3344,
|
9785 |
+
"rewards/accuracies": 1.0,
|
9786 |
+
"rewards/chosen": 0.2072155922651291,
|
9787 |
+
"rewards/margins": 0.54491126537323,
|
9788 |
+
"rewards/rejected": -0.3376956880092621,
|
9789 |
+
"step": 6510
|
9790 |
+
},
|
9791 |
+
{
|
9792 |
+
"epoch": 33.05822784810127,
|
9793 |
+
"grad_norm": 128557.62032643631,
|
9794 |
+
"learning_rate": 4.481353807583829e-08,
|
9795 |
+
"logits/chosen": -0.2354935109615326,
|
9796 |
+
"logits/rejected": 0.728766143321991,
|
9797 |
+
"logps/chosen": -29.432445526123047,
|
9798 |
+
"logps/rejected": -585.3494262695312,
|
9799 |
+
"loss": 11835.0961,
|
9800 |
+
"rewards/accuracies": 0.9750000238418579,
|
9801 |
+
"rewards/chosen": 0.2072407454252243,
|
9802 |
+
"rewards/margins": 0.5606441497802734,
|
9803 |
+
"rewards/rejected": -0.35340338945388794,
|
9804 |
+
"step": 6520
|
9805 |
+
},
|
9806 |
+
{
|
9807 |
+
"epoch": 33.10886075949367,
|
9808 |
+
"grad_norm": 91776.99508964189,
|
9809 |
+
"learning_rate": 4.4030084612973985e-08,
|
9810 |
+
"logits/chosen": -1.175462007522583,
|
9811 |
+
"logits/rejected": -1.1933832168579102,
|
9812 |
+
"logps/chosen": -21.900630950927734,
|
9813 |
+
"logps/rejected": -574.4762573242188,
|
9814 |
+
"loss": 12157.9109,
|
9815 |
+
"rewards/accuracies": 1.0,
|
9816 |
+
"rewards/chosen": 0.20543113350868225,
|
9817 |
+
"rewards/margins": 0.5517674684524536,
|
9818 |
+
"rewards/rejected": -0.346336305141449,
|
9819 |
+
"step": 6530
|
9820 |
+
},
|
9821 |
+
{
|
9822 |
+
"epoch": 33.15949367088608,
|
9823 |
+
"grad_norm": 89893.29258028018,
|
9824 |
+
"learning_rate": 4.324663115010968e-08,
|
9825 |
+
"logits/chosen": -0.7350924015045166,
|
9826 |
+
"logits/rejected": -0.16997528076171875,
|
9827 |
+
"logps/chosen": -23.83113670349121,
|
9828 |
+
"logps/rejected": -575.5424194335938,
|
9829 |
+
"loss": 11686.9375,
|
9830 |
+
"rewards/accuracies": 0.987500011920929,
|
9831 |
+
"rewards/chosen": 0.20410069823265076,
|
9832 |
+
"rewards/margins": 0.5512816309928894,
|
9833 |
+
"rewards/rejected": -0.34718090295791626,
|
9834 |
+
"step": 6540
|
9835 |
+
},
|
9836 |
+
{
|
9837 |
+
"epoch": 33.210126582278484,
|
9838 |
+
"grad_norm": 120975.35903766478,
|
9839 |
+
"learning_rate": 4.246317768724538e-08,
|
9840 |
+
"logits/chosen": -0.08163319528102875,
|
9841 |
+
"logits/rejected": 0.07650710642337799,
|
9842 |
+
"logps/chosen": -27.332035064697266,
|
9843 |
+
"logps/rejected": -579.8117065429688,
|
9844 |
+
"loss": 11339.9297,
|
9845 |
+
"rewards/accuracies": 0.987500011920929,
|
9846 |
+
"rewards/chosen": 0.2052970826625824,
|
9847 |
+
"rewards/margins": 0.5518554449081421,
|
9848 |
+
"rewards/rejected": -0.3465583324432373,
|
9849 |
+
"step": 6550
|
9850 |
+
},
|
9851 |
+
{
|
9852 |
+
"epoch": 33.26075949367089,
|
9853 |
+
"grad_norm": 180391.18731890293,
|
9854 |
+
"learning_rate": 4.167972422438107e-08,
|
9855 |
+
"logits/chosen": -0.8266963958740234,
|
9856 |
+
"logits/rejected": 1.0672438144683838,
|
9857 |
+
"logps/chosen": -23.287370681762695,
|
9858 |
+
"logps/rejected": -572.2568969726562,
|
9859 |
+
"loss": 11743.5586,
|
9860 |
+
"rewards/accuracies": 0.987500011920929,
|
9861 |
+
"rewards/chosen": 0.19874341785907745,
|
9862 |
+
"rewards/margins": 0.5572081804275513,
|
9863 |
+
"rewards/rejected": -0.35846468806266785,
|
9864 |
+
"step": 6560
|
9865 |
+
},
|
9866 |
+
{
|
9867 |
+
"epoch": 33.311392405063295,
|
9868 |
+
"grad_norm": 84282.72341083131,
|
9869 |
+
"learning_rate": 4.0896270761516763e-08,
|
9870 |
+
"logits/chosen": -1.91861093044281,
|
9871 |
+
"logits/rejected": -1.3766604661941528,
|
9872 |
+
"logps/chosen": -24.914443969726562,
|
9873 |
+
"logps/rejected": -581.4729614257812,
|
9874 |
+
"loss": 11078.6969,
|
9875 |
+
"rewards/accuracies": 1.0,
|
9876 |
+
"rewards/chosen": 0.21130716800689697,
|
9877 |
+
"rewards/margins": 0.5577182769775391,
|
9878 |
+
"rewards/rejected": -0.34641116857528687,
|
9879 |
+
"step": 6570
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 33.36202531645569,
|
9883 |
+
"grad_norm": 199903.347381946,
|
9884 |
+
"learning_rate": 4.0112817298652456e-08,
|
9885 |
+
"logits/chosen": -1.2995800971984863,
|
9886 |
+
"logits/rejected": -1.6440702676773071,
|
9887 |
+
"logps/chosen": -22.356828689575195,
|
9888 |
+
"logps/rejected": -591.6265869140625,
|
9889 |
+
"loss": 11937.0477,
|
9890 |
+
"rewards/accuracies": 1.0,
|
9891 |
+
"rewards/chosen": 0.2072306126356125,
|
9892 |
+
"rewards/margins": 0.5668342709541321,
|
9893 |
+
"rewards/rejected": -0.3596035838127136,
|
9894 |
+
"step": 6580
|
9895 |
+
},
|
9896 |
+
{
|
9897 |
+
"epoch": 33.4126582278481,
|
9898 |
+
"grad_norm": 138603.96487037002,
|
9899 |
+
"learning_rate": 3.932936383578815e-08,
|
9900 |
+
"logits/chosen": 0.8098524212837219,
|
9901 |
+
"logits/rejected": 1.2947828769683838,
|
9902 |
+
"logps/chosen": -26.31606674194336,
|
9903 |
+
"logps/rejected": -584.9072265625,
|
9904 |
+
"loss": 11177.5336,
|
9905 |
+
"rewards/accuracies": 1.0,
|
9906 |
+
"rewards/chosen": 0.20386937260627747,
|
9907 |
+
"rewards/margins": 0.5589767694473267,
|
9908 |
+
"rewards/rejected": -0.3551073968410492,
|
9909 |
+
"step": 6590
|
9910 |
+
},
|
9911 |
+
{
|
9912 |
+
"epoch": 33.4632911392405,
|
9913 |
+
"grad_norm": 123948.78500072335,
|
9914 |
+
"learning_rate": 3.854591037292385e-08,
|
9915 |
+
"logits/chosen": -2.16947603225708,
|
9916 |
+
"logits/rejected": -1.0904394388198853,
|
9917 |
+
"logps/chosen": -42.8673095703125,
|
9918 |
+
"logps/rejected": -585.2350463867188,
|
9919 |
+
"loss": 11894.6641,
|
9920 |
+
"rewards/accuracies": 1.0,
|
9921 |
+
"rewards/chosen": 0.2195717990398407,
|
9922 |
+
"rewards/margins": 0.5601873397827148,
|
9923 |
+
"rewards/rejected": -0.34061557054519653,
|
9924 |
+
"step": 6600
|
9925 |
+
},
|
9926 |
+
{
|
9927 |
+
"epoch": 33.51392405063291,
|
9928 |
+
"grad_norm": 113327.62874252205,
|
9929 |
+
"learning_rate": 3.776245691005954e-08,
|
9930 |
+
"logits/chosen": -1.375249981880188,
|
9931 |
+
"logits/rejected": -0.7785667181015015,
|
9932 |
+
"logps/chosen": -29.649211883544922,
|
9933 |
+
"logps/rejected": -602.9840698242188,
|
9934 |
+
"loss": 12210.0344,
|
9935 |
+
"rewards/accuracies": 1.0,
|
9936 |
+
"rewards/chosen": 0.22011515498161316,
|
9937 |
+
"rewards/margins": 0.5793704390525818,
|
9938 |
+
"rewards/rejected": -0.35925528407096863,
|
9939 |
+
"step": 6610
|
9940 |
+
},
|
9941 |
+
{
|
9942 |
+
"epoch": 33.564556962025314,
|
9943 |
+
"grad_norm": 79524.96422723045,
|
9944 |
+
"learning_rate": 3.6979003447195234e-08,
|
9945 |
+
"logits/chosen": -0.7508550882339478,
|
9946 |
+
"logits/rejected": -0.23799777030944824,
|
9947 |
+
"logps/chosen": -17.09669303894043,
|
9948 |
+
"logps/rejected": -572.3134155273438,
|
9949 |
+
"loss": 12138.4203,
|
9950 |
+
"rewards/accuracies": 1.0,
|
9951 |
+
"rewards/chosen": 0.2047223150730133,
|
9952 |
+
"rewards/margins": 0.5538768768310547,
|
9953 |
+
"rewards/rejected": -0.34915462136268616,
|
9954 |
+
"step": 6620
|
9955 |
+
},
|
9956 |
+
{
|
9957 |
+
"epoch": 33.61518987341772,
|
9958 |
+
"grad_norm": 80597.64263401506,
|
9959 |
+
"learning_rate": 3.619554998433093e-08,
|
9960 |
+
"logits/chosen": -1.7500404119491577,
|
9961 |
+
"logits/rejected": -1.4937622547149658,
|
9962 |
+
"logps/chosen": -24.847320556640625,
|
9963 |
+
"logps/rejected": -594.1591796875,
|
9964 |
+
"loss": 12270.6344,
|
9965 |
+
"rewards/accuracies": 1.0,
|
9966 |
+
"rewards/chosen": 0.21394848823547363,
|
9967 |
+
"rewards/margins": 0.5700836181640625,
|
9968 |
+
"rewards/rejected": -0.35613518953323364,
|
9969 |
+
"step": 6630
|
9970 |
+
},
|
9971 |
+
{
|
9972 |
+
"epoch": 33.665822784810125,
|
9973 |
+
"grad_norm": 100669.75725024722,
|
9974 |
+
"learning_rate": 3.541209652146662e-08,
|
9975 |
+
"logits/chosen": -0.4524414539337158,
|
9976 |
+
"logits/rejected": -0.5694657564163208,
|
9977 |
+
"logps/chosen": -25.72067642211914,
|
9978 |
+
"logps/rejected": -572.9901123046875,
|
9979 |
+
"loss": 11448.4047,
|
9980 |
+
"rewards/accuracies": 0.9750000238418579,
|
9981 |
+
"rewards/chosen": 0.20151250064373016,
|
9982 |
+
"rewards/margins": 0.5470980405807495,
|
9983 |
+
"rewards/rejected": -0.345585435628891,
|
9984 |
+
"step": 6640
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 33.71645569620253,
|
9988 |
+
"grad_norm": 136734.1372891588,
|
9989 |
+
"learning_rate": 3.462864305860232e-08,
|
9990 |
+
"logits/chosen": -0.10392338037490845,
|
9991 |
+
"logits/rejected": 0.025324154645204544,
|
9992 |
+
"logps/chosen": -23.138744354248047,
|
9993 |
+
"logps/rejected": -578.2369995117188,
|
9994 |
+
"loss": 11719.0234,
|
9995 |
+
"rewards/accuracies": 0.9750000238418579,
|
9996 |
+
"rewards/chosen": 0.2065146416425705,
|
9997 |
+
"rewards/margins": 0.5536417365074158,
|
9998 |
+
"rewards/rejected": -0.3471270501613617,
|
9999 |
+
"step": 6650
|
10000 |
+
},
|
10001 |
+
{
|
10002 |
+
"epoch": 33.767088607594935,
|
10003 |
+
"grad_norm": 96060.1935775592,
|
10004 |
+
"learning_rate": 3.384518959573801e-08,
|
10005 |
+
"logits/chosen": -1.5298357009887695,
|
10006 |
+
"logits/rejected": -1.111659049987793,
|
10007 |
+
"logps/chosen": -36.602691650390625,
|
10008 |
+
"logps/rejected": -594.2269287109375,
|
10009 |
+
"loss": 11903.4828,
|
10010 |
+
"rewards/accuracies": 0.987500011920929,
|
10011 |
+
"rewards/chosen": 0.21466748416423798,
|
10012 |
+
"rewards/margins": 0.5613253116607666,
|
10013 |
+
"rewards/rejected": -0.34665781259536743,
|
10014 |
+
"step": 6660
|
10015 |
+
},
|
10016 |
+
{
|
10017 |
+
"epoch": 33.81772151898734,
|
10018 |
+
"grad_norm": 82308.39144839271,
|
10019 |
+
"learning_rate": 3.3061736132873705e-08,
|
10020 |
+
"logits/chosen": -1.9629747867584229,
|
10021 |
+
"logits/rejected": -1.8584734201431274,
|
10022 |
+
"logps/chosen": -17.865947723388672,
|
10023 |
+
"logps/rejected": -566.314453125,
|
10024 |
+
"loss": 12147.5891,
|
10025 |
+
"rewards/accuracies": 1.0,
|
10026 |
+
"rewards/chosen": 0.2041165828704834,
|
10027 |
+
"rewards/margins": 0.5491331219673157,
|
10028 |
+
"rewards/rejected": -0.3450164496898651,
|
10029 |
+
"step": 6670
|
10030 |
+
},
|
10031 |
+
{
|
10032 |
+
"epoch": 33.868354430379746,
|
10033 |
+
"grad_norm": 132433.76933098322,
|
10034 |
+
"learning_rate": 3.22782826700094e-08,
|
10035 |
+
"logits/chosen": -0.10643855482339859,
|
10036 |
+
"logits/rejected": 0.1565506011247635,
|
10037 |
+
"logps/chosen": -23.206607818603516,
|
10038 |
+
"logps/rejected": -565.3855590820312,
|
10039 |
+
"loss": 11928.0656,
|
10040 |
+
"rewards/accuracies": 0.9750000238418579,
|
10041 |
+
"rewards/chosen": 0.19701281189918518,
|
10042 |
+
"rewards/margins": 0.5372076630592346,
|
10043 |
+
"rewards/rejected": -0.34019485116004944,
|
10044 |
+
"step": 6680
|
10045 |
+
},
|
10046 |
+
{
|
10047 |
+
"epoch": 33.91898734177215,
|
10048 |
+
"grad_norm": 99524.21425394616,
|
10049 |
+
"learning_rate": 3.149482920714509e-08,
|
10050 |
+
"logits/chosen": 0.7746875286102295,
|
10051 |
+
"logits/rejected": 1.4906342029571533,
|
10052 |
+
"logps/chosen": -28.62857437133789,
|
10053 |
+
"logps/rejected": -569.8626708984375,
|
10054 |
+
"loss": 11616.475,
|
10055 |
+
"rewards/accuracies": 0.9750000238418579,
|
10056 |
+
"rewards/chosen": 0.20620207488536835,
|
10057 |
+
"rewards/margins": 0.548004686832428,
|
10058 |
+
"rewards/rejected": -0.3418026268482208,
|
10059 |
+
"step": 6690
|
10060 |
+
},
|
10061 |
+
{
|
10062 |
+
"epoch": 33.96962025316456,
|
10063 |
+
"grad_norm": 72753.16066899289,
|
10064 |
+
"learning_rate": 3.071137574428079e-08,
|
10065 |
+
"logits/chosen": 0.6492331624031067,
|
10066 |
+
"logits/rejected": 0.7617141604423523,
|
10067 |
+
"logps/chosen": -25.677988052368164,
|
10068 |
+
"logps/rejected": -560.1131591796875,
|
10069 |
+
"loss": 12074.9086,
|
10070 |
+
"rewards/accuracies": 0.9750000238418579,
|
10071 |
+
"rewards/chosen": 0.19937190413475037,
|
10072 |
+
"rewards/margins": 0.5361818075180054,
|
10073 |
+
"rewards/rejected": -0.33680984377861023,
|
10074 |
+
"step": 6700
|
10075 |
}
|
10076 |
],
|
10077 |
"logging_steps": 10,
|