cat-searcher
commited on
Commit
•
4afe893
1
Parent(s):
28df2c3
Training in progress, epoch 14, checkpoint
Browse files- last-checkpoint/global_step2765/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2765/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step2765/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cc6bf9b97a94b90699f45a7fc38d202fb177244c38dbfa6f36c6271cda5ac98
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eeef3e3616d377f90d12ea86346472117cc77ecfbe03a66d411aa1ddd074309
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28412a24f982a84a28a802cdb61b19b296e9b7c9f9c89785ac5f1eee4a5f6d68
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:040ce20af60dd7da7e3b33695723be16e615aabf38e4ccffc60ef74d46370115
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1c3657eea69211a07303c22ef8ed8263791c3ca1ed2c1bd76f7759df03df7b0
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:825697483c0d8dbaacff033646860487bed6d0841aea89b0ebb37c5e044e33f3
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07e3a889dd2c577204f326a307c88e12d2fd0036af5c448e2668e5c2344e60b9
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:665538142db7bdae0da78f519f68de74bf856090f5be14a5db2ac232dac90866
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2765/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:991c4af0233056577c2d759b974541f3192f0cd0dc222e13ee0454f7746d456a
|
3 |
+
size 85570
|
last-checkpoint/global_step2765/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:560aa971ff7c1f9452683a8ea181cecc727c47c0c053ad3c99c5dc4a3009323c
|
3 |
+
size 85506
|
last-checkpoint/global_step2765/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4b35f8a1cbb50722c530745fc7e51d9d5bd879c6ae1bf2914faf9e7575c5ad3
|
3 |
+
size 85506
|
last-checkpoint/global_step2765/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f0cd13d71b6e139ffda808e5162d78171b468da3bd5f30c830aa64f68d9a908
|
3 |
+
size 85506
|
last-checkpoint/global_step2765/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a722a8dbc5fc64619032a8f6e56ed05ef49c8507a36d3d4c5add2d009be8d1d
|
3 |
+
size 85506
|
last-checkpoint/global_step2765/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:188f0299e77dc75c4f4dc4a7610a68931ae3788b068f59ce3cd1b428d784e9d8
|
3 |
+
size 85506
|
last-checkpoint/global_step2765/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f7e7d8e3c13d10a4e9c3c7444405fec9aedb115d49a24baf47b10f0bd9a9275
|
3 |
+
size 85506
|
last-checkpoint/global_step2765/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d49eeefae15968f8ea4dcaca071892d22ccb38c4cb923ca8c9bb007d471075be
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step2765
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51554b1b141cb737fe919c7a7d90b054dc54778551653b359782ad641e140b5d
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39ccf095d82d77e6ad3b265d9fdc49a86ab3fad18daa92e729265d0f5f3bbb71
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:136693ea21ecf0b59fde813d184b14a037ef0bca92ae910a6f73169e6198ccb5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3862,6 +3862,306 @@
|
|
3862 |
"rewards/margins": 0.5234028100967407,
|
3863 |
"rewards/rejected": -0.3366047739982605,
|
3864 |
"step": 2560
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3865 |
}
|
3866 |
],
|
3867 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 14.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2765,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3862 |
"rewards/margins": 0.5234028100967407,
|
3863 |
"rewards/rejected": -0.3366047739982605,
|
3864 |
"step": 2560
|
3865 |
+
},
|
3866 |
+
{
|
3867 |
+
"epoch": 13.012658227848101,
|
3868 |
+
"grad_norm": 335161.21326055715,
|
3869 |
+
"learning_rate": 1.529301159511125e-07,
|
3870 |
+
"logits/chosen": 0.09210095554590225,
|
3871 |
+
"logits/rejected": 0.2885093688964844,
|
3872 |
+
"logps/chosen": -52.608367919921875,
|
3873 |
+
"logps/rejected": -558.9227294921875,
|
3874 |
+
"loss": 15959.725,
|
3875 |
+
"rewards/accuracies": 0.9750000238418579,
|
3876 |
+
"rewards/chosen": 0.1778368204832077,
|
3877 |
+
"rewards/margins": 0.5030940175056458,
|
3878 |
+
"rewards/rejected": -0.32525718212127686,
|
3879 |
+
"step": 2570
|
3880 |
+
},
|
3881 |
+
{
|
3882 |
+
"epoch": 13.063291139240507,
|
3883 |
+
"grad_norm": 771775.1017807113,
|
3884 |
+
"learning_rate": 1.5136320902538388e-07,
|
3885 |
+
"logits/chosen": -1.3265520334243774,
|
3886 |
+
"logits/rejected": -0.9296306371688843,
|
3887 |
+
"logps/chosen": -62.875038146972656,
|
3888 |
+
"logps/rejected": -560.3228759765625,
|
3889 |
+
"loss": 15567.6344,
|
3890 |
+
"rewards/accuracies": 0.987500011920929,
|
3891 |
+
"rewards/chosen": 0.18662917613983154,
|
3892 |
+
"rewards/margins": 0.49883994460105896,
|
3893 |
+
"rewards/rejected": -0.31221073865890503,
|
3894 |
+
"step": 2580
|
3895 |
+
},
|
3896 |
+
{
|
3897 |
+
"epoch": 13.113924050632912,
|
3898 |
+
"grad_norm": 446168.3148918395,
|
3899 |
+
"learning_rate": 1.4979630209965526e-07,
|
3900 |
+
"logits/chosen": -0.11115183681249619,
|
3901 |
+
"logits/rejected": 0.8431870341300964,
|
3902 |
+
"logps/chosen": -46.82927703857422,
|
3903 |
+
"logps/rejected": -552.5628051757812,
|
3904 |
+
"loss": 16255.3438,
|
3905 |
+
"rewards/accuracies": 1.0,
|
3906 |
+
"rewards/chosen": 0.17291709780693054,
|
3907 |
+
"rewards/margins": 0.5057471990585327,
|
3908 |
+
"rewards/rejected": -0.3328301012516022,
|
3909 |
+
"step": 2590
|
3910 |
+
},
|
3911 |
+
{
|
3912 |
+
"epoch": 13.164556962025316,
|
3913 |
+
"grad_norm": 586122.4453174556,
|
3914 |
+
"learning_rate": 1.4822939517392665e-07,
|
3915 |
+
"logits/chosen": -0.757349967956543,
|
3916 |
+
"logits/rejected": 0.037270687520504,
|
3917 |
+
"logps/chosen": -55.21142578125,
|
3918 |
+
"logps/rejected": -557.4276123046875,
|
3919 |
+
"loss": 16720.8172,
|
3920 |
+
"rewards/accuracies": 0.9750000238418579,
|
3921 |
+
"rewards/chosen": 0.1850253939628601,
|
3922 |
+
"rewards/margins": 0.5111584663391113,
|
3923 |
+
"rewards/rejected": -0.32613304257392883,
|
3924 |
+
"step": 2600
|
3925 |
+
},
|
3926 |
+
{
|
3927 |
+
"epoch": 13.215189873417721,
|
3928 |
+
"grad_norm": 420628.2693101698,
|
3929 |
+
"learning_rate": 1.4666248824819803e-07,
|
3930 |
+
"logits/chosen": -0.11379202455282211,
|
3931 |
+
"logits/rejected": -0.11788152158260345,
|
3932 |
+
"logps/chosen": -49.00257110595703,
|
3933 |
+
"logps/rejected": -576.3326416015625,
|
3934 |
+
"loss": 16306.0688,
|
3935 |
+
"rewards/accuracies": 1.0,
|
3936 |
+
"rewards/chosen": 0.18590961396694183,
|
3937 |
+
"rewards/margins": 0.5257736444473267,
|
3938 |
+
"rewards/rejected": -0.33986401557922363,
|
3939 |
+
"step": 2610
|
3940 |
+
},
|
3941 |
+
{
|
3942 |
+
"epoch": 13.265822784810126,
|
3943 |
+
"grad_norm": 436219.2086299041,
|
3944 |
+
"learning_rate": 1.4509558132246945e-07,
|
3945 |
+
"logits/chosen": -0.7918820977210999,
|
3946 |
+
"logits/rejected": -0.14419230818748474,
|
3947 |
+
"logps/chosen": -56.56486892700195,
|
3948 |
+
"logps/rejected": -584.7669677734375,
|
3949 |
+
"loss": 16369.2719,
|
3950 |
+
"rewards/accuracies": 1.0,
|
3951 |
+
"rewards/chosen": 0.18918678164482117,
|
3952 |
+
"rewards/margins": 0.5305701494216919,
|
3953 |
+
"rewards/rejected": -0.3413834273815155,
|
3954 |
+
"step": 2620
|
3955 |
+
},
|
3956 |
+
{
|
3957 |
+
"epoch": 13.316455696202532,
|
3958 |
+
"grad_norm": 596793.3073449759,
|
3959 |
+
"learning_rate": 1.4352867439674083e-07,
|
3960 |
+
"logits/chosen": 1.9564087390899658,
|
3961 |
+
"logits/rejected": 2.246692180633545,
|
3962 |
+
"logps/chosen": -51.851722717285156,
|
3963 |
+
"logps/rejected": -548.3530883789062,
|
3964 |
+
"loss": 16796.1063,
|
3965 |
+
"rewards/accuracies": 0.987500011920929,
|
3966 |
+
"rewards/chosen": 0.18290123343467712,
|
3967 |
+
"rewards/margins": 0.4980129599571228,
|
3968 |
+
"rewards/rejected": -0.3151116371154785,
|
3969 |
+
"step": 2630
|
3970 |
+
},
|
3971 |
+
{
|
3972 |
+
"epoch": 13.367088607594937,
|
3973 |
+
"grad_norm": 474733.1664905385,
|
3974 |
+
"learning_rate": 1.4196176747101222e-07,
|
3975 |
+
"logits/chosen": 0.530455470085144,
|
3976 |
+
"logits/rejected": 0.14751790463924408,
|
3977 |
+
"logps/chosen": -48.55830001831055,
|
3978 |
+
"logps/rejected": -558.3150024414062,
|
3979 |
+
"loss": 16144.2906,
|
3980 |
+
"rewards/accuracies": 0.987500011920929,
|
3981 |
+
"rewards/chosen": 0.17938682436943054,
|
3982 |
+
"rewards/margins": 0.5066471695899963,
|
3983 |
+
"rewards/rejected": -0.3272603154182434,
|
3984 |
+
"step": 2640
|
3985 |
+
},
|
3986 |
+
{
|
3987 |
+
"epoch": 13.417721518987342,
|
3988 |
+
"grad_norm": 1649837.8712191964,
|
3989 |
+
"learning_rate": 1.403948605452836e-07,
|
3990 |
+
"logits/chosen": -0.03671743720769882,
|
3991 |
+
"logits/rejected": 0.7579118013381958,
|
3992 |
+
"logps/chosen": -42.065242767333984,
|
3993 |
+
"logps/rejected": -554.230224609375,
|
3994 |
+
"loss": 16118.8047,
|
3995 |
+
"rewards/accuracies": 1.0,
|
3996 |
+
"rewards/chosen": 0.18058671057224274,
|
3997 |
+
"rewards/margins": 0.5129930377006531,
|
3998 |
+
"rewards/rejected": -0.3324064016342163,
|
3999 |
+
"step": 2650
|
4000 |
+
},
|
4001 |
+
{
|
4002 |
+
"epoch": 13.468354430379748,
|
4003 |
+
"grad_norm": 594890.10809389,
|
4004 |
+
"learning_rate": 1.38827953619555e-07,
|
4005 |
+
"logits/chosen": 0.288557231426239,
|
4006 |
+
"logits/rejected": 0.2958771288394928,
|
4007 |
+
"logps/chosen": -52.33495330810547,
|
4008 |
+
"logps/rejected": -561.2686157226562,
|
4009 |
+
"loss": 15733.7453,
|
4010 |
+
"rewards/accuracies": 0.987500011920929,
|
4011 |
+
"rewards/chosen": 0.1808079034090042,
|
4012 |
+
"rewards/margins": 0.5136345028877258,
|
4013 |
+
"rewards/rejected": -0.3328266143798828,
|
4014 |
+
"step": 2660
|
4015 |
+
},
|
4016 |
+
{
|
4017 |
+
"epoch": 13.518987341772151,
|
4018 |
+
"grad_norm": 467820.0894028926,
|
4019 |
+
"learning_rate": 1.3726104669382637e-07,
|
4020 |
+
"logits/chosen": -0.39889806509017944,
|
4021 |
+
"logits/rejected": 0.02098376676440239,
|
4022 |
+
"logps/chosen": -53.63391876220703,
|
4023 |
+
"logps/rejected": -556.4556884765625,
|
4024 |
+
"loss": 15584.0406,
|
4025 |
+
"rewards/accuracies": 0.9750000238418579,
|
4026 |
+
"rewards/chosen": 0.18383657932281494,
|
4027 |
+
"rewards/margins": 0.5030336976051331,
|
4028 |
+
"rewards/rejected": -0.3191971182823181,
|
4029 |
+
"step": 2670
|
4030 |
+
},
|
4031 |
+
{
|
4032 |
+
"epoch": 13.569620253164556,
|
4033 |
+
"grad_norm": 349641.6736805019,
|
4034 |
+
"learning_rate": 1.3569413976809776e-07,
|
4035 |
+
"logits/chosen": -1.0416258573532104,
|
4036 |
+
"logits/rejected": -0.687407374382019,
|
4037 |
+
"logps/chosen": -40.50030517578125,
|
4038 |
+
"logps/rejected": -560.5548706054688,
|
4039 |
+
"loss": 15275.5312,
|
4040 |
+
"rewards/accuracies": 1.0,
|
4041 |
+
"rewards/chosen": 0.18312379717826843,
|
4042 |
+
"rewards/margins": 0.5221952199935913,
|
4043 |
+
"rewards/rejected": -0.33907145261764526,
|
4044 |
+
"step": 2680
|
4045 |
+
},
|
4046 |
+
{
|
4047 |
+
"epoch": 13.620253164556962,
|
4048 |
+
"grad_norm": 769040.8085386351,
|
4049 |
+
"learning_rate": 1.3412723284236915e-07,
|
4050 |
+
"logits/chosen": 1.7483727931976318,
|
4051 |
+
"logits/rejected": 2.3238413333892822,
|
4052 |
+
"logps/chosen": -49.73235321044922,
|
4053 |
+
"logps/rejected": -559.8514404296875,
|
4054 |
+
"loss": 16850.175,
|
4055 |
+
"rewards/accuracies": 1.0,
|
4056 |
+
"rewards/chosen": 0.18260039389133453,
|
4057 |
+
"rewards/margins": 0.5106431245803833,
|
4058 |
+
"rewards/rejected": -0.3280427157878876,
|
4059 |
+
"step": 2690
|
4060 |
+
},
|
4061 |
+
{
|
4062 |
+
"epoch": 13.670886075949367,
|
4063 |
+
"grad_norm": 459226.17158416886,
|
4064 |
+
"learning_rate": 1.3256032591664053e-07,
|
4065 |
+
"logits/chosen": -0.2809019684791565,
|
4066 |
+
"logits/rejected": 0.43121522665023804,
|
4067 |
+
"logps/chosen": -58.69781494140625,
|
4068 |
+
"logps/rejected": -588.9169921875,
|
4069 |
+
"loss": 15404.6109,
|
4070 |
+
"rewards/accuracies": 0.987500011920929,
|
4071 |
+
"rewards/chosen": 0.19193768501281738,
|
4072 |
+
"rewards/margins": 0.5343278646469116,
|
4073 |
+
"rewards/rejected": -0.34239014983177185,
|
4074 |
+
"step": 2700
|
4075 |
+
},
|
4076 |
+
{
|
4077 |
+
"epoch": 13.721518987341772,
|
4078 |
+
"grad_norm": 339517.3364374988,
|
4079 |
+
"learning_rate": 1.3099341899091192e-07,
|
4080 |
+
"logits/chosen": 0.3717317283153534,
|
4081 |
+
"logits/rejected": 0.5634896159172058,
|
4082 |
+
"logps/chosen": -60.52980422973633,
|
4083 |
+
"logps/rejected": -555.2349243164062,
|
4084 |
+
"loss": 15341.8219,
|
4085 |
+
"rewards/accuracies": 0.9375,
|
4086 |
+
"rewards/chosen": 0.17079493403434753,
|
4087 |
+
"rewards/margins": 0.489946186542511,
|
4088 |
+
"rewards/rejected": -0.31915122270584106,
|
4089 |
+
"step": 2710
|
4090 |
+
},
|
4091 |
+
{
|
4092 |
+
"epoch": 13.772151898734178,
|
4093 |
+
"grad_norm": 1157921.1375110236,
|
4094 |
+
"learning_rate": 1.2942651206518333e-07,
|
4095 |
+
"logits/chosen": -1.758825659751892,
|
4096 |
+
"logits/rejected": -1.0223956108093262,
|
4097 |
+
"logps/chosen": -48.61360549926758,
|
4098 |
+
"logps/rejected": -562.5768432617188,
|
4099 |
+
"loss": 16196.7625,
|
4100 |
+
"rewards/accuracies": 0.987500011920929,
|
4101 |
+
"rewards/chosen": 0.180302232503891,
|
4102 |
+
"rewards/margins": 0.5197224020957947,
|
4103 |
+
"rewards/rejected": -0.3394201397895813,
|
4104 |
+
"step": 2720
|
4105 |
+
},
|
4106 |
+
{
|
4107 |
+
"epoch": 13.822784810126583,
|
4108 |
+
"grad_norm": 434777.104877517,
|
4109 |
+
"learning_rate": 1.2785960513945471e-07,
|
4110 |
+
"logits/chosen": -0.3282082676887512,
|
4111 |
+
"logits/rejected": 0.4013535976409912,
|
4112 |
+
"logps/chosen": -50.629215240478516,
|
4113 |
+
"logps/rejected": -582.4617309570312,
|
4114 |
+
"loss": 15710.8641,
|
4115 |
+
"rewards/accuracies": 1.0,
|
4116 |
+
"rewards/chosen": 0.18200094997882843,
|
4117 |
+
"rewards/margins": 0.5299168825149536,
|
4118 |
+
"rewards/rejected": -0.3479159474372864,
|
4119 |
+
"step": 2730
|
4120 |
+
},
|
4121 |
+
{
|
4122 |
+
"epoch": 13.873417721518987,
|
4123 |
+
"grad_norm": 677123.1021845904,
|
4124 |
+
"learning_rate": 1.262926982137261e-07,
|
4125 |
+
"logits/chosen": -0.9533359408378601,
|
4126 |
+
"logits/rejected": -0.11374642699956894,
|
4127 |
+
"logps/chosen": -50.710845947265625,
|
4128 |
+
"logps/rejected": -568.776611328125,
|
4129 |
+
"loss": 16490.0469,
|
4130 |
+
"rewards/accuracies": 1.0,
|
4131 |
+
"rewards/chosen": 0.18456825613975525,
|
4132 |
+
"rewards/margins": 0.5208636522293091,
|
4133 |
+
"rewards/rejected": -0.3362954258918762,
|
4134 |
+
"step": 2740
|
4135 |
+
},
|
4136 |
+
{
|
4137 |
+
"epoch": 13.924050632911392,
|
4138 |
+
"grad_norm": 608241.5399016802,
|
4139 |
+
"learning_rate": 1.2472579128799749e-07,
|
4140 |
+
"logits/chosen": -0.009487760253250599,
|
4141 |
+
"logits/rejected": 0.5674014091491699,
|
4142 |
+
"logps/chosen": -47.34721755981445,
|
4143 |
+
"logps/rejected": -558.3707275390625,
|
4144 |
+
"loss": 16114.125,
|
4145 |
+
"rewards/accuracies": 0.9750000238418579,
|
4146 |
+
"rewards/chosen": 0.18486423790454865,
|
4147 |
+
"rewards/margins": 0.5096093416213989,
|
4148 |
+
"rewards/rejected": -0.3247450888156891,
|
4149 |
+
"step": 2750
|
4150 |
+
},
|
4151 |
+
{
|
4152 |
+
"epoch": 13.974683544303797,
|
4153 |
+
"grad_norm": 510265.43069577636,
|
4154 |
+
"learning_rate": 1.2315888436226887e-07,
|
4155 |
+
"logits/chosen": -1.1760886907577515,
|
4156 |
+
"logits/rejected": -0.8848980665206909,
|
4157 |
+
"logps/chosen": -50.471961975097656,
|
4158 |
+
"logps/rejected": -569.0016479492188,
|
4159 |
+
"loss": 15240.5234,
|
4160 |
+
"rewards/accuracies": 1.0,
|
4161 |
+
"rewards/chosen": 0.1889052391052246,
|
4162 |
+
"rewards/margins": 0.5153056383132935,
|
4163 |
+
"rewards/rejected": -0.3264002799987793,
|
4164 |
+
"step": 2760
|
4165 |
}
|
4166 |
],
|
4167 |
"logging_steps": 10,
|