cat-searcher
commited on
Commit
•
ec5f551
1
Parent(s):
cb82dde
Training in progress, epoch 28, checkpoint
Browse files- last-checkpoint/global_step5521/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5521/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5521/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eeb574e73c858813a928ff498dcd92efcd52d39bf6be93eb19f482e11a521d46
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8e88ff3e1e0d27d0f652203c6c8b5a774c4bf0d489719cc7fc6e9de2bb46e3
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3fb30080c8bf6107859398c3b1e15ac6ac4565566849c964afcb6f354ea1ae5
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35da7feb29692b199d80e67730abb3983caef4e1b0382cf78d411d246196f9d9
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:591123dfe28879cbe098dd6659b52eaae80c7bb2771ec06d6a603b62bc63fe74
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e642641e957b049f6002a70fb88a66fc527f94b80a7cd78722a8dd7875d89216
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8368bffa1785b2545bc93fc7f883c6b08555d632fc450f6d98baf24aeb7087e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db3fd9da9c40302bee2f82faaf6129307040541c2bf197edb02c875734a4cc4e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5521/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff0d15d674a1437beca49a16157757d5b4baf26599e5166ebed4540b1a7332e2
|
3 |
+
size 85570
|
last-checkpoint/global_step5521/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2affdfc29fcb3d39a58b25db9a58d2a0625bfe72f9991c515efa1f965b7fe8e7
|
3 |
+
size 85506
|
last-checkpoint/global_step5521/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c43225a1f5e98e8c5e00bffee1e9bfe34bcffd0cf304e1ca11f1e5b9996a51ac
|
3 |
+
size 85506
|
last-checkpoint/global_step5521/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7356cccf7500d912fc837459640a05ccf3bb6a90b783e55b70e69002bd44e7fb
|
3 |
+
size 85506
|
last-checkpoint/global_step5521/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afdc283d704de0c72ee00ca73a9a7e7d1a1476f4ee3267871f8c87791768ab0f
|
3 |
+
size 85506
|
last-checkpoint/global_step5521/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ceb8cc949be997c8ca9a59cb0ce35e0178d2683d8c9003245cb86046f76d968f
|
3 |
+
size 85506
|
last-checkpoint/global_step5521/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be11c9817cfbbf742d8f6d97cd03a3c9ca846924fe4c3d497537572d49aebeae
|
3 |
+
size 85506
|
last-checkpoint/global_step5521/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:744b41b26aaa23988bf6b5023d6e71f632a5c5d64c7b6d8b845694193ec91bed
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step5521
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49f4a9ae06898314b6bbf8ad9fca6ca16dea158bd29ecc0506863d601b8345f4
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4832308f86af667638feea3c3365de459978e8c0f49e3d367cf4d5379875a12f
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038329a940174c7998542fe9a3c903ee0c21d0a2351959a1cb53ac9af3988f89
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc2eb62cd1e31d2c95a28eaadd97a496b27751983378626efc3ee2a53ae743ff
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c76f1ee6b7bfb2e7a6ca68f028fe40297bc56fa8287959be7a51545af2a824b
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e4716e0a98e24ecef0261fa45d53aa531ce14f99b4992682e0257f7c483a80d
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:732644bf9682f11fe70f041a2575d5822769847a3b56320e6cfdbd3914eb98f9
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3d90978ff8f9057d8587ec1258de11f4d5531805002e65d952f5725b93f51be
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a013a277003a33e8bb2418ae66c1488282f502fc920260b5fc9f337843415ab2
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9f8887e423cbfbb140f966733dae63123515ed5345b3e075eb892b0646282c8
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2d39cfa3808e402183de5c305fcd5a81f2c9ffede37076a3856931095792cc0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8002,6 +8002,306 @@
|
|
8002 |
"rewards/margins": 0.5666243433952332,
|
8003 |
"rewards/rejected": -0.3455334007740021,
|
8004 |
"step": 5320
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8005 |
}
|
8006 |
],
|
8007 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 28.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 5521,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8002 |
"rewards/margins": 0.5666243433952332,
|
8003 |
"rewards/rejected": -0.3455334007740021,
|
8004 |
"step": 5320
|
8005 |
+
},
|
8006 |
+
{
|
8007 |
+
"epoch": 27.03291139240506,
|
8008 |
+
"grad_norm": 306486.1159229183,
|
8009 |
+
"learning_rate": 1.380445001566907e-07,
|
8010 |
+
"logits/chosen": -0.2942148447036743,
|
8011 |
+
"logits/rejected": 0.29008275270462036,
|
8012 |
+
"logps/chosen": -28.0673770904541,
|
8013 |
+
"logps/rejected": -591.0224609375,
|
8014 |
+
"loss": 12393.8094,
|
8015 |
+
"rewards/accuracies": 1.0,
|
8016 |
+
"rewards/chosen": 0.21129322052001953,
|
8017 |
+
"rewards/margins": 0.562667965888977,
|
8018 |
+
"rewards/rejected": -0.35137468576431274,
|
8019 |
+
"step": 5330
|
8020 |
+
},
|
8021 |
+
{
|
8022 |
+
"epoch": 27.083544303797467,
|
8023 |
+
"grad_norm": 291301.0379049935,
|
8024 |
+
"learning_rate": 1.3726104669382637e-07,
|
8025 |
+
"logits/chosen": -0.04897233098745346,
|
8026 |
+
"logits/rejected": 0.2625051736831665,
|
8027 |
+
"logps/chosen": -28.288782119750977,
|
8028 |
+
"logps/rejected": -600.4498291015625,
|
8029 |
+
"loss": 12295.5109,
|
8030 |
+
"rewards/accuracies": 1.0,
|
8031 |
+
"rewards/chosen": 0.20883643627166748,
|
8032 |
+
"rewards/margins": 0.5699074864387512,
|
8033 |
+
"rewards/rejected": -0.36107105016708374,
|
8034 |
+
"step": 5340
|
8035 |
+
},
|
8036 |
+
{
|
8037 |
+
"epoch": 27.134177215189872,
|
8038 |
+
"grad_norm": 336826.5711799587,
|
8039 |
+
"learning_rate": 1.3647759323096208e-07,
|
8040 |
+
"logits/chosen": -3.574831008911133,
|
8041 |
+
"logits/rejected": -3.1615543365478516,
|
8042 |
+
"logps/chosen": -28.667476654052734,
|
8043 |
+
"logps/rejected": -610.046630859375,
|
8044 |
+
"loss": 12205.2984,
|
8045 |
+
"rewards/accuracies": 1.0,
|
8046 |
+
"rewards/chosen": 0.21462281048297882,
|
8047 |
+
"rewards/margins": 0.5811195373535156,
|
8048 |
+
"rewards/rejected": -0.3664968013763428,
|
8049 |
+
"step": 5350
|
8050 |
+
},
|
8051 |
+
{
|
8052 |
+
"epoch": 27.184810126582278,
|
8053 |
+
"grad_norm": 253108.22870561373,
|
8054 |
+
"learning_rate": 1.3569413976809776e-07,
|
8055 |
+
"logits/chosen": -1.275773048400879,
|
8056 |
+
"logits/rejected": -0.2816539406776428,
|
8057 |
+
"logps/chosen": -27.488027572631836,
|
8058 |
+
"logps/rejected": -576.04443359375,
|
8059 |
+
"loss": 12752.6922,
|
8060 |
+
"rewards/accuracies": 1.0,
|
8061 |
+
"rewards/chosen": 0.20755627751350403,
|
8062 |
+
"rewards/margins": 0.5584858059883118,
|
8063 |
+
"rewards/rejected": -0.35092949867248535,
|
8064 |
+
"step": 5360
|
8065 |
+
},
|
8066 |
+
{
|
8067 |
+
"epoch": 27.235443037974683,
|
8068 |
+
"grad_norm": 378986.1297500305,
|
8069 |
+
"learning_rate": 1.3491068630523347e-07,
|
8070 |
+
"logits/chosen": -0.7276864051818848,
|
8071 |
+
"logits/rejected": -0.2372014820575714,
|
8072 |
+
"logps/chosen": -27.652713775634766,
|
8073 |
+
"logps/rejected": -574.1866455078125,
|
8074 |
+
"loss": 12491.7875,
|
8075 |
+
"rewards/accuracies": 0.987500011920929,
|
8076 |
+
"rewards/chosen": 0.19472074508666992,
|
8077 |
+
"rewards/margins": 0.5458452701568604,
|
8078 |
+
"rewards/rejected": -0.35112449526786804,
|
8079 |
+
"step": 5370
|
8080 |
+
},
|
8081 |
+
{
|
8082 |
+
"epoch": 27.28607594936709,
|
8083 |
+
"grad_norm": 355029.2404666128,
|
8084 |
+
"learning_rate": 1.3412723284236915e-07,
|
8085 |
+
"logits/chosen": 0.03503293916583061,
|
8086 |
+
"logits/rejected": 0.09463844448328018,
|
8087 |
+
"logps/chosen": -20.01060676574707,
|
8088 |
+
"logps/rejected": -571.03369140625,
|
8089 |
+
"loss": 12906.6961,
|
8090 |
+
"rewards/accuracies": 1.0,
|
8091 |
+
"rewards/chosen": 0.1995813399553299,
|
8092 |
+
"rewards/margins": 0.5492128133773804,
|
8093 |
+
"rewards/rejected": -0.3496314287185669,
|
8094 |
+
"step": 5380
|
8095 |
+
},
|
8096 |
+
{
|
8097 |
+
"epoch": 27.336708860759494,
|
8098 |
+
"grad_norm": 174005.9141855672,
|
8099 |
+
"learning_rate": 1.3334377937950485e-07,
|
8100 |
+
"logits/chosen": -1.0307856798171997,
|
8101 |
+
"logits/rejected": -0.8787088394165039,
|
8102 |
+
"logps/chosen": -27.538768768310547,
|
8103 |
+
"logps/rejected": -584.1838989257812,
|
8104 |
+
"loss": 12431.2086,
|
8105 |
+
"rewards/accuracies": 0.987500011920929,
|
8106 |
+
"rewards/chosen": 0.20768491923809052,
|
8107 |
+
"rewards/margins": 0.558754026889801,
|
8108 |
+
"rewards/rejected": -0.3510691225528717,
|
8109 |
+
"step": 5390
|
8110 |
+
},
|
8111 |
+
{
|
8112 |
+
"epoch": 27.3873417721519,
|
8113 |
+
"grad_norm": 333107.0988957162,
|
8114 |
+
"learning_rate": 1.3256032591664053e-07,
|
8115 |
+
"logits/chosen": 0.49966010451316833,
|
8116 |
+
"logits/rejected": 1.4367059469223022,
|
8117 |
+
"logps/chosen": -22.20120620727539,
|
8118 |
+
"logps/rejected": -573.7286987304688,
|
8119 |
+
"loss": 12624.7586,
|
8120 |
+
"rewards/accuracies": 1.0,
|
8121 |
+
"rewards/chosen": 0.19961531460285187,
|
8122 |
+
"rewards/margins": 0.5520228743553162,
|
8123 |
+
"rewards/rejected": -0.35240763425827026,
|
8124 |
+
"step": 5400
|
8125 |
+
},
|
8126 |
+
{
|
8127 |
+
"epoch": 27.437974683544304,
|
8128 |
+
"grad_norm": 189125.20245582235,
|
8129 |
+
"learning_rate": 1.3177687245377624e-07,
|
8130 |
+
"logits/chosen": -0.491058886051178,
|
8131 |
+
"logits/rejected": -0.4180983603000641,
|
8132 |
+
"logps/chosen": -24.668697357177734,
|
8133 |
+
"logps/rejected": -574.7880249023438,
|
8134 |
+
"loss": 12818.4906,
|
8135 |
+
"rewards/accuracies": 1.0,
|
8136 |
+
"rewards/chosen": 0.203078955411911,
|
8137 |
+
"rewards/margins": 0.5475345849990845,
|
8138 |
+
"rewards/rejected": -0.34445568919181824,
|
8139 |
+
"step": 5410
|
8140 |
+
},
|
8141 |
+
{
|
8142 |
+
"epoch": 27.48860759493671,
|
8143 |
+
"grad_norm": 255453.1741505276,
|
8144 |
+
"learning_rate": 1.3099341899091192e-07,
|
8145 |
+
"logits/chosen": -1.3983430862426758,
|
8146 |
+
"logits/rejected": -1.0761035680770874,
|
8147 |
+
"logps/chosen": -28.14908790588379,
|
8148 |
+
"logps/rejected": -567.4022216796875,
|
8149 |
+
"loss": 12266.7156,
|
8150 |
+
"rewards/accuracies": 0.987500011920929,
|
8151 |
+
"rewards/chosen": 0.19684790074825287,
|
8152 |
+
"rewards/margins": 0.5404728055000305,
|
8153 |
+
"rewards/rejected": -0.34362491965293884,
|
8154 |
+
"step": 5420
|
8155 |
+
},
|
8156 |
+
{
|
8157 |
+
"epoch": 27.539240506329115,
|
8158 |
+
"grad_norm": 199249.17490991156,
|
8159 |
+
"learning_rate": 1.3020996552804765e-07,
|
8160 |
+
"logits/chosen": -1.3831968307495117,
|
8161 |
+
"logits/rejected": -0.9957733154296875,
|
8162 |
+
"logps/chosen": -34.38856887817383,
|
8163 |
+
"logps/rejected": -583.5364379882812,
|
8164 |
+
"loss": 12353.943,
|
8165 |
+
"rewards/accuracies": 0.987500011920929,
|
8166 |
+
"rewards/chosen": 0.20319747924804688,
|
8167 |
+
"rewards/margins": 0.5525364875793457,
|
8168 |
+
"rewards/rejected": -0.34933900833129883,
|
8169 |
+
"step": 5430
|
8170 |
+
},
|
8171 |
+
{
|
8172 |
+
"epoch": 27.58987341772152,
|
8173 |
+
"grad_norm": 372801.7448533588,
|
8174 |
+
"learning_rate": 1.2942651206518333e-07,
|
8175 |
+
"logits/chosen": 0.7253493070602417,
|
8176 |
+
"logits/rejected": 0.6416251063346863,
|
8177 |
+
"logps/chosen": -36.44821548461914,
|
8178 |
+
"logps/rejected": -557.3819580078125,
|
8179 |
+
"loss": 12762.9742,
|
8180 |
+
"rewards/accuracies": 0.9750000238418579,
|
8181 |
+
"rewards/chosen": 0.19811172783374786,
|
8182 |
+
"rewards/margins": 0.5241624116897583,
|
8183 |
+
"rewards/rejected": -0.32605066895484924,
|
8184 |
+
"step": 5440
|
8185 |
+
},
|
8186 |
+
{
|
8187 |
+
"epoch": 27.640506329113926,
|
8188 |
+
"grad_norm": 250437.30987597498,
|
8189 |
+
"learning_rate": 1.2864305860231904e-07,
|
8190 |
+
"logits/chosen": -1.6367158889770508,
|
8191 |
+
"logits/rejected": -0.9662375450134277,
|
8192 |
+
"logps/chosen": -32.858455657958984,
|
8193 |
+
"logps/rejected": -566.0185546875,
|
8194 |
+
"loss": 13013.8914,
|
8195 |
+
"rewards/accuracies": 0.987500011920929,
|
8196 |
+
"rewards/chosen": 0.20383331179618835,
|
8197 |
+
"rewards/margins": 0.5383815169334412,
|
8198 |
+
"rewards/rejected": -0.3345482349395752,
|
8199 |
+
"step": 5450
|
8200 |
+
},
|
8201 |
+
{
|
8202 |
+
"epoch": 27.691139240506327,
|
8203 |
+
"grad_norm": 395640.3468149828,
|
8204 |
+
"learning_rate": 1.2785960513945471e-07,
|
8205 |
+
"logits/chosen": -1.0696049928665161,
|
8206 |
+
"logits/rejected": -0.7029746770858765,
|
8207 |
+
"logps/chosen": -27.334697723388672,
|
8208 |
+
"logps/rejected": -572.7042846679688,
|
8209 |
+
"loss": 12608.9328,
|
8210 |
+
"rewards/accuracies": 1.0,
|
8211 |
+
"rewards/chosen": 0.2039167582988739,
|
8212 |
+
"rewards/margins": 0.5491318106651306,
|
8213 |
+
"rewards/rejected": -0.3452150225639343,
|
8214 |
+
"step": 5460
|
8215 |
+
},
|
8216 |
+
{
|
8217 |
+
"epoch": 27.741772151898733,
|
8218 |
+
"grad_norm": 737045.8738711793,
|
8219 |
+
"learning_rate": 1.2707615167659042e-07,
|
8220 |
+
"logits/chosen": -1.4398880004882812,
|
8221 |
+
"logits/rejected": -0.3085852265357971,
|
8222 |
+
"logps/chosen": -20.763835906982422,
|
8223 |
+
"logps/rejected": -557.7874755859375,
|
8224 |
+
"loss": 12662.9484,
|
8225 |
+
"rewards/accuracies": 1.0,
|
8226 |
+
"rewards/chosen": 0.20201142132282257,
|
8227 |
+
"rewards/margins": 0.5384231209754944,
|
8228 |
+
"rewards/rejected": -0.336411714553833,
|
8229 |
+
"step": 5470
|
8230 |
+
},
|
8231 |
+
{
|
8232 |
+
"epoch": 27.792405063291138,
|
8233 |
+
"grad_norm": 286929.61277431983,
|
8234 |
+
"learning_rate": 1.262926982137261e-07,
|
8235 |
+
"logits/chosen": -0.6262455582618713,
|
8236 |
+
"logits/rejected": -0.4802684783935547,
|
8237 |
+
"logps/chosen": -23.99846076965332,
|
8238 |
+
"logps/rejected": -575.8038940429688,
|
8239 |
+
"loss": 12141.5641,
|
8240 |
+
"rewards/accuracies": 0.987500011920929,
|
8241 |
+
"rewards/chosen": 0.19609448313713074,
|
8242 |
+
"rewards/margins": 0.551897406578064,
|
8243 |
+
"rewards/rejected": -0.355802983045578,
|
8244 |
+
"step": 5480
|
8245 |
+
},
|
8246 |
+
{
|
8247 |
+
"epoch": 27.843037974683543,
|
8248 |
+
"grad_norm": 749583.0814867924,
|
8249 |
+
"learning_rate": 1.255092447508618e-07,
|
8250 |
+
"logits/chosen": -1.7006927728652954,
|
8251 |
+
"logits/rejected": -1.0466101169586182,
|
8252 |
+
"logps/chosen": -29.710596084594727,
|
8253 |
+
"logps/rejected": -591.6888427734375,
|
8254 |
+
"loss": 12764.4375,
|
8255 |
+
"rewards/accuracies": 0.987500011920929,
|
8256 |
+
"rewards/chosen": 0.20634475350379944,
|
8257 |
+
"rewards/margins": 0.5570266842842102,
|
8258 |
+
"rewards/rejected": -0.3506819009780884,
|
8259 |
+
"step": 5490
|
8260 |
+
},
|
8261 |
+
{
|
8262 |
+
"epoch": 27.89367088607595,
|
8263 |
+
"grad_norm": 380933.42642122327,
|
8264 |
+
"learning_rate": 1.2472579128799749e-07,
|
8265 |
+
"logits/chosen": -1.4751110076904297,
|
8266 |
+
"logits/rejected": -0.9937122464179993,
|
8267 |
+
"logps/chosen": -34.70015335083008,
|
8268 |
+
"logps/rejected": -571.9727172851562,
|
8269 |
+
"loss": 12012.7281,
|
8270 |
+
"rewards/accuracies": 0.9750000238418579,
|
8271 |
+
"rewards/chosen": 0.2022305727005005,
|
8272 |
+
"rewards/margins": 0.5379850268363953,
|
8273 |
+
"rewards/rejected": -0.3357544541358948,
|
8274 |
+
"step": 5500
|
8275 |
+
},
|
8276 |
+
{
|
8277 |
+
"epoch": 27.944303797468354,
|
8278 |
+
"grad_norm": 258509.47313842815,
|
8279 |
+
"learning_rate": 1.2394233782513317e-07,
|
8280 |
+
"logits/chosen": -1.372650384902954,
|
8281 |
+
"logits/rejected": -1.0075037479400635,
|
8282 |
+
"logps/chosen": -35.64197540283203,
|
8283 |
+
"logps/rejected": -593.0923461914062,
|
8284 |
+
"loss": 11889.8898,
|
8285 |
+
"rewards/accuracies": 1.0,
|
8286 |
+
"rewards/chosen": 0.2117808610200882,
|
8287 |
+
"rewards/margins": 0.5591001510620117,
|
8288 |
+
"rewards/rejected": -0.34731921553611755,
|
8289 |
+
"step": 5510
|
8290 |
+
},
|
8291 |
+
{
|
8292 |
+
"epoch": 27.99493670886076,
|
8293 |
+
"grad_norm": 208938.2840249938,
|
8294 |
+
"learning_rate": 1.2315888436226887e-07,
|
8295 |
+
"logits/chosen": -2.1264805793762207,
|
8296 |
+
"logits/rejected": -1.4703245162963867,
|
8297 |
+
"logps/chosen": -32.981266021728516,
|
8298 |
+
"logps/rejected": -597.6434936523438,
|
8299 |
+
"loss": 12257.6922,
|
8300 |
+
"rewards/accuracies": 0.987500011920929,
|
8301 |
+
"rewards/chosen": 0.21187356114387512,
|
8302 |
+
"rewards/margins": 0.5627579689025879,
|
8303 |
+
"rewards/rejected": -0.3508843779563904,
|
8304 |
+
"step": 5520
|
8305 |
}
|
8306 |
],
|
8307 |
"logging_steps": 10,
|