cat-searcher
commited on
Commit
•
3cb495c
1
Parent(s):
4e1ff4b
Training in progress, epoch 16, checkpoint
Browse files- last-checkpoint/global_step3357/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3357/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step3357/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:651dfd4d290244a1f1ed25e174745eeda53f83c1afd49bddb222769456acea8e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31f45441c5dddfa9aebb33d35dea7081c3fd19843f93ae43f1316128bcf7ecbd
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be22b35b5e310d4823197426b1cd8771c316b9b15fbb4942ab377e3613a0ca98
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75b51b99bcd90701c6b97516470bcaab959b7e1922605d5890a31d2ba2f4d580
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aec2a04e08e7e35026b691693353bc8fe5847d81fb99acf19398b7243dee628c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c13dfce78f2ac90a9157ad2f0abb21e59d1100da60be9db95d0a1f5336fc8209
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f14a7f3e1501454fd040e30452a51f1a851474905f426344fd8a832d6508ec23
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d72714ef0021f7b5e8d8ab77b07e3b29b0beef1ef3cb644ec965dc8a17226688
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3357/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:656172ce79b5ff43fd76cd8ecb27884c941e173e847c11f888c3e98ac9533316
|
3 |
+
size 85570
|
last-checkpoint/global_step3357/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2ef4f048ceb531e6ef19e52e0fbde839db8f8fd1d12ef45f132d6f89f4f0145
|
3 |
+
size 85506
|
last-checkpoint/global_step3357/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b68cedc957959ed2fd5ba36d094b0e6e389a7422983ed8d4c10563a6408e9b40
|
3 |
+
size 85506
|
last-checkpoint/global_step3357/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12d1268fd1df98fe5916753b22e9b23a49b58ee4b9f4ca149196b69e198904cc
|
3 |
+
size 85506
|
last-checkpoint/global_step3357/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fc46b4ccbddfc0f7c25f583348c5d2cdb40c0392ea8cec3a0925d9d5ed56785
|
3 |
+
size 85506
|
last-checkpoint/global_step3357/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb9afb70ad87e6ce43387783fa59973a4a6904164bf83ddcccd5b7fe0b820ac9
|
3 |
+
size 85506
|
last-checkpoint/global_step3357/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:162023a179a88c571b455c0ab3ea05ad7132b5fe8c517f18e503e04716909aae
|
3 |
+
size 85506
|
last-checkpoint/global_step3357/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52ee316718258920bde55c9660e06cb5c39c6343956d78d173a25f15371fb1f3
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step3357
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3578035697ae915bf8ed319e400346be0b8f4d900849a6d07f0ff9b4c3b1711c
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a652aa698a378ecc5fb4aaee9480c493c9a62c60f4f96b74c9d6698fa2aa8d33
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a0c9979566a5d89cb3c766336548670ec6f2291deba1b7ab1764c12d3187b24
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03e36a570d6158fc25d1cf5d9f8f450fc64c5a7683330277f89ff76d5f2fc6cd
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4f619cbef4b74f1680d667c8788285a602392e63bdf3760ef3a59ec8864d483
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fc037fba93ace1bf7ce01b1a5f7d785698d47b4cc2cedf2300bbf7a41ebf05c
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ab728c2461d6d1c64f04d7cbfdfcbfa7bd7ad0ef6e19d52458501ee81b27128
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27530e653ebf5997ae3159cdcde264607e6a6f86b7e3c7a1b3a1e8301cd43d03
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1fddaeb1257697bd7c0101abf1ab23f2925d0d9165cd8bddfbd22f8444db2b7
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:942af3734a320fe12a3205a47ca1cdc7d1f0996bfde86c020a35545ccd2fd418
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:605054ed043f8ba321ca13100ae25afc2296eb67de83d5027f6f7f6d891a4130
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 16.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4762,6 +4762,291 @@
|
|
4762 |
"rewards/margins": 0.5268322825431824,
|
4763 |
"rewards/rejected": -0.34266436100006104,
|
4764 |
"step": 3160
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4765 |
}
|
4766 |
],
|
4767 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 16.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3357,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4762 |
"rewards/margins": 0.5268322825431824,
|
4763 |
"rewards/rejected": -0.34266436100006104,
|
4764 |
"step": 3160
|
4765 |
+
},
|
4766 |
+
{
|
4767 |
+
"epoch": 16.050632911392405,
|
4768 |
+
"grad_norm": 256869.56003810524,
|
4769 |
+
"learning_rate": 5.8915700407395795e-08,
|
4770 |
+
"logits/chosen": -1.1983295679092407,
|
4771 |
+
"logits/rejected": -0.22695603966712952,
|
4772 |
+
"logps/chosen": -41.12403106689453,
|
4773 |
+
"logps/rejected": -573.8383178710938,
|
4774 |
+
"loss": 14636.0719,
|
4775 |
+
"rewards/accuracies": 0.987500011920929,
|
4776 |
+
"rewards/chosen": 0.1912733018398285,
|
4777 |
+
"rewards/margins": 0.5368129014968872,
|
4778 |
+
"rewards/rejected": -0.3455396294593811,
|
4779 |
+
"step": 3170
|
4780 |
+
},
|
4781 |
+
{
|
4782 |
+
"epoch": 16.10126582278481,
|
4783 |
+
"grad_norm": 251620.82775792846,
|
4784 |
+
"learning_rate": 5.734879348166719e-08,
|
4785 |
+
"logits/chosen": -0.662868082523346,
|
4786 |
+
"logits/rejected": 0.3795197606086731,
|
4787 |
+
"logps/chosen": -38.75691604614258,
|
4788 |
+
"logps/rejected": -555.0902709960938,
|
4789 |
+
"loss": 14758.6562,
|
4790 |
+
"rewards/accuracies": 0.9624999761581421,
|
4791 |
+
"rewards/chosen": 0.1840089112520218,
|
4792 |
+
"rewards/margins": 0.5191300511360168,
|
4793 |
+
"rewards/rejected": -0.335121214389801,
|
4794 |
+
"step": 3180
|
4795 |
+
},
|
4796 |
+
{
|
4797 |
+
"epoch": 16.151898734177216,
|
4798 |
+
"grad_norm": 386320.34193101624,
|
4799 |
+
"learning_rate": 5.5781886555938573e-08,
|
4800 |
+
"logits/chosen": 0.9088973999023438,
|
4801 |
+
"logits/rejected": 1.0200951099395752,
|
4802 |
+
"logps/chosen": -37.841434478759766,
|
4803 |
+
"logps/rejected": -549.9398193359375,
|
4804 |
+
"loss": 14645.3125,
|
4805 |
+
"rewards/accuracies": 0.9750000238418579,
|
4806 |
+
"rewards/chosen": 0.18419453501701355,
|
4807 |
+
"rewards/margins": 0.5178717374801636,
|
4808 |
+
"rewards/rejected": -0.3336772620677948,
|
4809 |
+
"step": 3190
|
4810 |
+
},
|
4811 |
+
{
|
4812 |
+
"epoch": 16.20253164556962,
|
4813 |
+
"grad_norm": 323738.56127307797,
|
4814 |
+
"learning_rate": 5.421497963020996e-08,
|
4815 |
+
"logits/chosen": 1.6748106479644775,
|
4816 |
+
"logits/rejected": 1.7903064489364624,
|
4817 |
+
"logps/chosen": -43.683780670166016,
|
4818 |
+
"logps/rejected": -559.7962036132812,
|
4819 |
+
"loss": 14378.5187,
|
4820 |
+
"rewards/accuracies": 0.9750000238418579,
|
4821 |
+
"rewards/chosen": 0.1856391578912735,
|
4822 |
+
"rewards/margins": 0.5182951092720032,
|
4823 |
+
"rewards/rejected": -0.3326559364795685,
|
4824 |
+
"step": 3200
|
4825 |
+
},
|
4826 |
+
{
|
4827 |
+
"epoch": 16.253164556962027,
|
4828 |
+
"grad_norm": 254204.27494940045,
|
4829 |
+
"learning_rate": 5.264807270448135e-08,
|
4830 |
+
"logits/chosen": -0.028285836800932884,
|
4831 |
+
"logits/rejected": 0.47511911392211914,
|
4832 |
+
"logps/chosen": -46.74934005737305,
|
4833 |
+
"logps/rejected": -582.1607666015625,
|
4834 |
+
"loss": 14203.1469,
|
4835 |
+
"rewards/accuracies": 0.987500011920929,
|
4836 |
+
"rewards/chosen": 0.19257526099681854,
|
4837 |
+
"rewards/margins": 0.5342021584510803,
|
4838 |
+
"rewards/rejected": -0.3416268825531006,
|
4839 |
+
"step": 3210
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 16.303797468354432,
|
4843 |
+
"grad_norm": 295536.9430947363,
|
4844 |
+
"learning_rate": 5.108116577875274e-08,
|
4845 |
+
"logits/chosen": 0.9740939140319824,
|
4846 |
+
"logits/rejected": 0.8530548810958862,
|
4847 |
+
"logps/chosen": -43.95893478393555,
|
4848 |
+
"logps/rejected": -566.3425903320312,
|
4849 |
+
"loss": 14617.1531,
|
4850 |
+
"rewards/accuracies": 1.0,
|
4851 |
+
"rewards/chosen": 0.18452490866184235,
|
4852 |
+
"rewards/margins": 0.5231844782829285,
|
4853 |
+
"rewards/rejected": -0.3386596143245697,
|
4854 |
+
"step": 3220
|
4855 |
+
},
|
4856 |
+
{
|
4857 |
+
"epoch": 16.354430379746834,
|
4858 |
+
"grad_norm": 228442.89270088554,
|
4859 |
+
"learning_rate": 4.951425885302413e-08,
|
4860 |
+
"logits/chosen": -0.6641544699668884,
|
4861 |
+
"logits/rejected": -0.42437514662742615,
|
4862 |
+
"logps/chosen": -42.97655487060547,
|
4863 |
+
"logps/rejected": -572.6472778320312,
|
4864 |
+
"loss": 14575.375,
|
4865 |
+
"rewards/accuracies": 0.987500011920929,
|
4866 |
+
"rewards/chosen": 0.19143202900886536,
|
4867 |
+
"rewards/margins": 0.5323026776313782,
|
4868 |
+
"rewards/rejected": -0.34087061882019043,
|
4869 |
+
"step": 3230
|
4870 |
+
},
|
4871 |
+
{
|
4872 |
+
"epoch": 16.40506329113924,
|
4873 |
+
"grad_norm": 280822.1227003712,
|
4874 |
+
"learning_rate": 4.7947351927295515e-08,
|
4875 |
+
"logits/chosen": 1.1500619649887085,
|
4876 |
+
"logits/rejected": 1.5377223491668701,
|
4877 |
+
"logps/chosen": -40.756866455078125,
|
4878 |
+
"logps/rejected": -555.7669067382812,
|
4879 |
+
"loss": 14355.8438,
|
4880 |
+
"rewards/accuracies": 1.0,
|
4881 |
+
"rewards/chosen": 0.18818344175815582,
|
4882 |
+
"rewards/margins": 0.5185222029685974,
|
4883 |
+
"rewards/rejected": -0.3303387761116028,
|
4884 |
+
"step": 3240
|
4885 |
+
},
|
4886 |
+
{
|
4887 |
+
"epoch": 16.455696202531644,
|
4888 |
+
"grad_norm": 211726.7404787661,
|
4889 |
+
"learning_rate": 4.63804450015669e-08,
|
4890 |
+
"logits/chosen": -0.1092449203133583,
|
4891 |
+
"logits/rejected": 0.2951999306678772,
|
4892 |
+
"logps/chosen": -42.441200256347656,
|
4893 |
+
"logps/rejected": -545.1079711914062,
|
4894 |
+
"loss": 14375.5266,
|
4895 |
+
"rewards/accuracies": 0.987500011920929,
|
4896 |
+
"rewards/chosen": 0.18679597973823547,
|
4897 |
+
"rewards/margins": 0.5060458779335022,
|
4898 |
+
"rewards/rejected": -0.31924980878829956,
|
4899 |
+
"step": 3250
|
4900 |
+
},
|
4901 |
+
{
|
4902 |
+
"epoch": 16.50632911392405,
|
4903 |
+
"grad_norm": 356888.551437776,
|
4904 |
+
"learning_rate": 4.481353807583829e-08,
|
4905 |
+
"logits/chosen": -1.3785438537597656,
|
4906 |
+
"logits/rejected": -1.0880242586135864,
|
4907 |
+
"logps/chosen": -54.5753288269043,
|
4908 |
+
"logps/rejected": -585.0982666015625,
|
4909 |
+
"loss": 13676.1484,
|
4910 |
+
"rewards/accuracies": 1.0,
|
4911 |
+
"rewards/chosen": 0.19741004705429077,
|
4912 |
+
"rewards/margins": 0.5335227251052856,
|
4913 |
+
"rewards/rejected": -0.3361126780509949,
|
4914 |
+
"step": 3260
|
4915 |
+
},
|
4916 |
+
{
|
4917 |
+
"epoch": 16.556962025316455,
|
4918 |
+
"grad_norm": 364581.3025715214,
|
4919 |
+
"learning_rate": 4.324663115010968e-08,
|
4920 |
+
"logits/chosen": -0.7049742341041565,
|
4921 |
+
"logits/rejected": -0.23324167728424072,
|
4922 |
+
"logps/chosen": -51.56848907470703,
|
4923 |
+
"logps/rejected": -578.4015502929688,
|
4924 |
+
"loss": 14484.6266,
|
4925 |
+
"rewards/accuracies": 0.987500011920929,
|
4926 |
+
"rewards/chosen": 0.18512576818466187,
|
4927 |
+
"rewards/margins": 0.5236076712608337,
|
4928 |
+
"rewards/rejected": -0.3384818732738495,
|
4929 |
+
"step": 3270
|
4930 |
+
},
|
4931 |
+
{
|
4932 |
+
"epoch": 16.60759493670886,
|
4933 |
+
"grad_norm": 336864.8330615521,
|
4934 |
+
"learning_rate": 4.167972422438107e-08,
|
4935 |
+
"logits/chosen": -0.9721381068229675,
|
4936 |
+
"logits/rejected": -1.1028145551681519,
|
4937 |
+
"logps/chosen": -55.94579315185547,
|
4938 |
+
"logps/rejected": -583.2372436523438,
|
4939 |
+
"loss": 14945.2641,
|
4940 |
+
"rewards/accuracies": 0.9750000238418579,
|
4941 |
+
"rewards/chosen": 0.19380484521389008,
|
4942 |
+
"rewards/margins": 0.5321142673492432,
|
4943 |
+
"rewards/rejected": -0.3383094370365143,
|
4944 |
+
"step": 3280
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 16.658227848101266,
|
4948 |
+
"grad_norm": 310564.956837095,
|
4949 |
+
"learning_rate": 4.0112817298652456e-08,
|
4950 |
+
"logits/chosen": -0.6065518260002136,
|
4951 |
+
"logits/rejected": -0.21473164856433868,
|
4952 |
+
"logps/chosen": -46.307228088378906,
|
4953 |
+
"logps/rejected": -586.7664184570312,
|
4954 |
+
"loss": 14667.4531,
|
4955 |
+
"rewards/accuracies": 0.987500011920929,
|
4956 |
+
"rewards/chosen": 0.19551894068717957,
|
4957 |
+
"rewards/margins": 0.5414855480194092,
|
4958 |
+
"rewards/rejected": -0.345966637134552,
|
4959 |
+
"step": 3290
|
4960 |
+
},
|
4961 |
+
{
|
4962 |
+
"epoch": 16.70886075949367,
|
4963 |
+
"grad_norm": 329301.5108160766,
|
4964 |
+
"learning_rate": 3.854591037292385e-08,
|
4965 |
+
"logits/chosen": 0.40292587876319885,
|
4966 |
+
"logits/rejected": 1.5396214723587036,
|
4967 |
+
"logps/chosen": -40.793739318847656,
|
4968 |
+
"logps/rejected": -570.8857421875,
|
4969 |
+
"loss": 14524.3094,
|
4970 |
+
"rewards/accuracies": 0.987500011920929,
|
4971 |
+
"rewards/chosen": 0.18828611075878143,
|
4972 |
+
"rewards/margins": 0.5341116189956665,
|
4973 |
+
"rewards/rejected": -0.3458254337310791,
|
4974 |
+
"step": 3300
|
4975 |
+
},
|
4976 |
+
{
|
4977 |
+
"epoch": 16.759493670886076,
|
4978 |
+
"grad_norm": 389871.220870713,
|
4979 |
+
"learning_rate": 3.6979003447195234e-08,
|
4980 |
+
"logits/chosen": -0.2180454283952713,
|
4981 |
+
"logits/rejected": 0.63756263256073,
|
4982 |
+
"logps/chosen": -48.842628479003906,
|
4983 |
+
"logps/rejected": -596.3530883789062,
|
4984 |
+
"loss": 15026.0328,
|
4985 |
+
"rewards/accuracies": 1.0,
|
4986 |
+
"rewards/chosen": 0.19478780031204224,
|
4987 |
+
"rewards/margins": 0.5423206090927124,
|
4988 |
+
"rewards/rejected": -0.34753280878067017,
|
4989 |
+
"step": 3310
|
4990 |
+
},
|
4991 |
+
{
|
4992 |
+
"epoch": 16.810126582278482,
|
4993 |
+
"grad_norm": 297091.2945334893,
|
4994 |
+
"learning_rate": 3.541209652146662e-08,
|
4995 |
+
"logits/chosen": -0.4556306302547455,
|
4996 |
+
"logits/rejected": 0.1757240742444992,
|
4997 |
+
"logps/chosen": -52.64439010620117,
|
4998 |
+
"logps/rejected": -598.89990234375,
|
4999 |
+
"loss": 14405.2531,
|
5000 |
+
"rewards/accuracies": 0.987500011920929,
|
5001 |
+
"rewards/chosen": 0.19450917840003967,
|
5002 |
+
"rewards/margins": 0.5458864569664001,
|
5003 |
+
"rewards/rejected": -0.3513772487640381,
|
5004 |
+
"step": 3320
|
5005 |
+
},
|
5006 |
+
{
|
5007 |
+
"epoch": 16.860759493670887,
|
5008 |
+
"grad_norm": 1094427.122685082,
|
5009 |
+
"learning_rate": 3.384518959573801e-08,
|
5010 |
+
"logits/chosen": -0.09430136531591415,
|
5011 |
+
"logits/rejected": 0.669711709022522,
|
5012 |
+
"logps/chosen": -48.170013427734375,
|
5013 |
+
"logps/rejected": -584.9744873046875,
|
5014 |
+
"loss": 15005.1063,
|
5015 |
+
"rewards/accuracies": 0.987500011920929,
|
5016 |
+
"rewards/chosen": 0.1912693828344345,
|
5017 |
+
"rewards/margins": 0.5353102087974548,
|
5018 |
+
"rewards/rejected": -0.34404081106185913,
|
5019 |
+
"step": 3330
|
5020 |
+
},
|
5021 |
+
{
|
5022 |
+
"epoch": 16.911392405063292,
|
5023 |
+
"grad_norm": 266675.6307359935,
|
5024 |
+
"learning_rate": 3.22782826700094e-08,
|
5025 |
+
"logits/chosen": -0.09551366418600082,
|
5026 |
+
"logits/rejected": -0.07008041441440582,
|
5027 |
+
"logps/chosen": -36.88441848754883,
|
5028 |
+
"logps/rejected": -568.5509033203125,
|
5029 |
+
"loss": 13823.6516,
|
5030 |
+
"rewards/accuracies": 1.0,
|
5031 |
+
"rewards/chosen": 0.18999743461608887,
|
5032 |
+
"rewards/margins": 0.5339778661727905,
|
5033 |
+
"rewards/rejected": -0.34398046135902405,
|
5034 |
+
"step": 3340
|
5035 |
+
},
|
5036 |
+
{
|
5037 |
+
"epoch": 16.962025316455698,
|
5038 |
+
"grad_norm": 562034.347414135,
|
5039 |
+
"learning_rate": 3.071137574428079e-08,
|
5040 |
+
"logits/chosen": 0.6763383746147156,
|
5041 |
+
"logits/rejected": 0.4948856830596924,
|
5042 |
+
"logps/chosen": -46.25956726074219,
|
5043 |
+
"logps/rejected": -565.7184448242188,
|
5044 |
+
"loss": 14414.3859,
|
5045 |
+
"rewards/accuracies": 0.9750000238418579,
|
5046 |
+
"rewards/chosen": 0.18556642532348633,
|
5047 |
+
"rewards/margins": 0.5159622430801392,
|
5048 |
+
"rewards/rejected": -0.33039581775665283,
|
5049 |
+
"step": 3350
|
5050 |
}
|
5051 |
],
|
5052 |
"logging_steps": 10,
|