cat-searcher
commited on
Commit
•
81fd0d3
1
Parent(s):
4c91759
Training in progress, epoch 26, checkpoint
Browse files- last-checkpoint/global_step5126/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step5126/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5126/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1af7760daa4102e118869727094ce0fbf14cf0ed307b27fd7a19ff85ef1ed21a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f498a49895cf5e7427cb08addc7237919bba4b494e4662f68bc79562db491a54
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d45eede1b9dad005d98db27ce0db0913175175b92663cfb4f85bc45dbbd00604
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d0a86fae62a16d1e788480421c675f660b32f41088a78a07cbb65a4e0e0721e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86b033a977f0b9eb6e7524d3438e660be4cc3b071d1627f09f533fada7ecba6f
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93294787dbeb15413656d5ca75de1498bf9416ab80efbe381f748bf0a5dddb26
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d64b3b706fcc9670bc87aac98540dfdeb1369cd862a5c7efc91bafd9adb9ddc
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a41f3667f22c201d6f34a6995bb4c823becbc8bc45386074e697c4c504432fc4
|
3 |
+
size 2506176112
|
last-checkpoint/global_step5126/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bcb70e44e7823fcf90a769bebf9d9d9c0c233dea2cf642f794df31a267f9758
|
3 |
+
size 85570
|
last-checkpoint/global_step5126/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:793e32b15b99418321ce219dea2352f549f719604f1948e75215f5e62bafd89f
|
3 |
+
size 85506
|
last-checkpoint/global_step5126/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0cc4a5d3e2a700f6e59eeb9a0579057dfcd2067f6c2f9ea7a773b80af5754cd
|
3 |
+
size 85506
|
last-checkpoint/global_step5126/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d98445b970e33eaa6fc846e2436e367d24db4520657bd50ab1a5b68db45d68b8
|
3 |
+
size 85506
|
last-checkpoint/global_step5126/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db6597a5de591d9c3267ff18a867dbf2c93a54584fea066482d5d0e8de144cc0
|
3 |
+
size 85506
|
last-checkpoint/global_step5126/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b04cc0713b1195e919fe4270b38c3b6768391ade8ecda5dd8cfc4d3c2f70819
|
3 |
+
size 85506
|
last-checkpoint/global_step5126/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74c99032ceba04a67c3ac793f3988d89e77588a122bb311d2d8386335b393870
|
3 |
+
size 85506
|
last-checkpoint/global_step5126/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01a554fb7d11eb9dd4af7f666dbc86ec128b8514437d01f2b93b6201745af5d9
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step5126
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab3b8a6a1f14b87eddcd6889e77ca358471e584db04f6d14f50ce0ca4a94e8e2
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e312815d6589c577d3a29d19ba5e8956d7c9080646ba7b35c3708e364eb8f55
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dd0b885528e55ec25b01a487faef7810481e858198ac24b76aedb3688770c06
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a451e60f451c0ed06e4b0d619be9f7981c5af29ba16d797996e102e4d1fd7514
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff4027794d1d9c71e13291169d32d384e8f6078c931f43db354471cbc57d8639
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40925f5ac9883b8dfe22197d58f18429503331adeff91ce58e72d56b5094171a
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0890e080f98bcfb81036d2db959cc45209e8c2f67a0dccde184473488395153
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8e4b714cb76d14f84bc59d5d9ba706908caddc95de8f17bfbeb87cbce486cb3
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5c095f0a000582673860ef2dcfa50f1ba3d6bf9b31cb0a66349b60d581ecbe3
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e625b7623f260d65cb1001beba6e4d0df9ed61b3f496d3e767f280a6b73cde8
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2a6611856f3b4f599b410c5f2fa04b4cd6d782a4bd921f15735728fc381869d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7402,6 +7402,306 @@
|
|
7402 |
"rewards/margins": 0.5547462701797485,
|
7403 |
"rewards/rejected": -0.3539626896381378,
|
7404 |
"step": 4920
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7405 |
}
|
7406 |
],
|
7407 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 26.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 5126,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7402 |
"rewards/margins": 0.5547462701797485,
|
7403 |
"rewards/rejected": -0.3539626896381378,
|
7404 |
"step": 4920
|
7405 |
+
},
|
7406 |
+
{
|
7407 |
+
"epoch": 25.00759493670886,
|
7408 |
+
"grad_norm": 308388.08709269366,
|
7409 |
+
"learning_rate": 1.6938263867126293e-07,
|
7410 |
+
"logits/chosen": -1.6532137393951416,
|
7411 |
+
"logits/rejected": -1.572850227355957,
|
7412 |
+
"logps/chosen": -41.12345886230469,
|
7413 |
+
"logps/rejected": -613.5958862304688,
|
7414 |
+
"loss": 12755.7383,
|
7415 |
+
"rewards/accuracies": 0.9750000238418579,
|
7416 |
+
"rewards/chosen": 0.21259479224681854,
|
7417 |
+
"rewards/margins": 0.5684026479721069,
|
7418 |
+
"rewards/rejected": -0.3558078408241272,
|
7419 |
+
"step": 4930
|
7420 |
+
},
|
7421 |
+
{
|
7422 |
+
"epoch": 25.058227848101264,
|
7423 |
+
"grad_norm": 320761.03886897856,
|
7424 |
+
"learning_rate": 1.685991852083986e-07,
|
7425 |
+
"logits/chosen": -0.11034099757671356,
|
7426 |
+
"logits/rejected": -0.06293153762817383,
|
7427 |
+
"logps/chosen": -34.010704040527344,
|
7428 |
+
"logps/rejected": -583.318359375,
|
7429 |
+
"loss": 13300.3922,
|
7430 |
+
"rewards/accuracies": 0.9750000238418579,
|
7431 |
+
"rewards/chosen": 0.19742931425571442,
|
7432 |
+
"rewards/margins": 0.5516862273216248,
|
7433 |
+
"rewards/rejected": -0.35425692796707153,
|
7434 |
+
"step": 4940
|
7435 |
+
},
|
7436 |
+
{
|
7437 |
+
"epoch": 25.10886075949367,
|
7438 |
+
"grad_norm": 282559.397671993,
|
7439 |
+
"learning_rate": 1.6781573174553431e-07,
|
7440 |
+
"logits/chosen": 0.5274404883384705,
|
7441 |
+
"logits/rejected": 1.2507613897323608,
|
7442 |
+
"logps/chosen": -29.299930572509766,
|
7443 |
+
"logps/rejected": -554.8450927734375,
|
7444 |
+
"loss": 12685.2523,
|
7445 |
+
"rewards/accuracies": 0.9624999761581421,
|
7446 |
+
"rewards/chosen": 0.19330081343650818,
|
7447 |
+
"rewards/margins": 0.5271843671798706,
|
7448 |
+
"rewards/rejected": -0.3338836431503296,
|
7449 |
+
"step": 4950
|
7450 |
+
},
|
7451 |
+
{
|
7452 |
+
"epoch": 25.159493670886075,
|
7453 |
+
"grad_norm": 248533.31024359175,
|
7454 |
+
"learning_rate": 1.6703227828267e-07,
|
7455 |
+
"logits/chosen": -1.2484452724456787,
|
7456 |
+
"logits/rejected": -0.5531445741653442,
|
7457 |
+
"logps/chosen": -42.44970703125,
|
7458 |
+
"logps/rejected": -591.9672241210938,
|
7459 |
+
"loss": 12525.2,
|
7460 |
+
"rewards/accuracies": 1.0,
|
7461 |
+
"rewards/chosen": 0.20557789504528046,
|
7462 |
+
"rewards/margins": 0.5516069531440735,
|
7463 |
+
"rewards/rejected": -0.34602901339530945,
|
7464 |
+
"step": 4960
|
7465 |
+
},
|
7466 |
+
{
|
7467 |
+
"epoch": 25.21012658227848,
|
7468 |
+
"grad_norm": 365840.3682606488,
|
7469 |
+
"learning_rate": 1.662488248198057e-07,
|
7470 |
+
"logits/chosen": -1.5047721862792969,
|
7471 |
+
"logits/rejected": -1.5158735513687134,
|
7472 |
+
"logps/chosen": -31.838958740234375,
|
7473 |
+
"logps/rejected": -581.0045166015625,
|
7474 |
+
"loss": 13041.882,
|
7475 |
+
"rewards/accuracies": 0.9750000238418579,
|
7476 |
+
"rewards/chosen": 0.201541468501091,
|
7477 |
+
"rewards/margins": 0.5492666959762573,
|
7478 |
+
"rewards/rejected": -0.3477252125740051,
|
7479 |
+
"step": 4970
|
7480 |
+
},
|
7481 |
+
{
|
7482 |
+
"epoch": 25.260759493670886,
|
7483 |
+
"grad_norm": 364119.66442401055,
|
7484 |
+
"learning_rate": 1.6546537135694138e-07,
|
7485 |
+
"logits/chosen": -2.0333914756774902,
|
7486 |
+
"logits/rejected": -2.0420191287994385,
|
7487 |
+
"logps/chosen": -33.426788330078125,
|
7488 |
+
"logps/rejected": -577.18212890625,
|
7489 |
+
"loss": 13218.8875,
|
7490 |
+
"rewards/accuracies": 1.0,
|
7491 |
+
"rewards/chosen": 0.20231468975543976,
|
7492 |
+
"rewards/margins": 0.5456961989402771,
|
7493 |
+
"rewards/rejected": -0.3433815836906433,
|
7494 |
+
"step": 4980
|
7495 |
+
},
|
7496 |
+
{
|
7497 |
+
"epoch": 25.31139240506329,
|
7498 |
+
"grad_norm": 434691.5380135347,
|
7499 |
+
"learning_rate": 1.6468191789407709e-07,
|
7500 |
+
"logits/chosen": -0.23437795042991638,
|
7501 |
+
"logits/rejected": -0.03313719108700752,
|
7502 |
+
"logps/chosen": -33.025386810302734,
|
7503 |
+
"logps/rejected": -587.5833740234375,
|
7504 |
+
"loss": 12003.9711,
|
7505 |
+
"rewards/accuracies": 0.987500011920929,
|
7506 |
+
"rewards/chosen": 0.19747456908226013,
|
7507 |
+
"rewards/margins": 0.553167998790741,
|
7508 |
+
"rewards/rejected": -0.3556934595108032,
|
7509 |
+
"step": 4990
|
7510 |
+
},
|
7511 |
+
{
|
7512 |
+
"epoch": 25.362025316455696,
|
7513 |
+
"grad_norm": 257881.6224659914,
|
7514 |
+
"learning_rate": 1.6389846443121277e-07,
|
7515 |
+
"logits/chosen": 1.229998230934143,
|
7516 |
+
"logits/rejected": 1.8426265716552734,
|
7517 |
+
"logps/chosen": -31.151538848876953,
|
7518 |
+
"logps/rejected": -575.4852905273438,
|
7519 |
+
"loss": 13412.7078,
|
7520 |
+
"rewards/accuracies": 0.987500011920929,
|
7521 |
+
"rewards/chosen": 0.1954251229763031,
|
7522 |
+
"rewards/margins": 0.5429095029830933,
|
7523 |
+
"rewards/rejected": -0.34748440980911255,
|
7524 |
+
"step": 5000
|
7525 |
+
},
|
7526 |
+
{
|
7527 |
+
"epoch": 25.4126582278481,
|
7528 |
+
"grad_norm": 425285.73032920854,
|
7529 |
+
"learning_rate": 1.6311501096834847e-07,
|
7530 |
+
"logits/chosen": -1.241003155708313,
|
7531 |
+
"logits/rejected": -0.7176898121833801,
|
7532 |
+
"logps/chosen": -31.115795135498047,
|
7533 |
+
"logps/rejected": -558.19873046875,
|
7534 |
+
"loss": 13301.7094,
|
7535 |
+
"rewards/accuracies": 0.9624999761581421,
|
7536 |
+
"rewards/chosen": 0.19612053036689758,
|
7537 |
+
"rewards/margins": 0.5247890949249268,
|
7538 |
+
"rewards/rejected": -0.3286685347557068,
|
7539 |
+
"step": 5010
|
7540 |
+
},
|
7541 |
+
{
|
7542 |
+
"epoch": 25.463291139240507,
|
7543 |
+
"grad_norm": 372695.4381119174,
|
7544 |
+
"learning_rate": 1.6233155750548415e-07,
|
7545 |
+
"logits/chosen": -1.8982555866241455,
|
7546 |
+
"logits/rejected": -1.494901180267334,
|
7547 |
+
"logps/chosen": -28.403858184814453,
|
7548 |
+
"logps/rejected": -562.348388671875,
|
7549 |
+
"loss": 13093.6797,
|
7550 |
+
"rewards/accuracies": 1.0,
|
7551 |
+
"rewards/chosen": 0.203691765666008,
|
7552 |
+
"rewards/margins": 0.5354448556900024,
|
7553 |
+
"rewards/rejected": -0.33175310492515564,
|
7554 |
+
"step": 5020
|
7555 |
+
},
|
7556 |
+
{
|
7557 |
+
"epoch": 25.513924050632912,
|
7558 |
+
"grad_norm": 291137.30920257524,
|
7559 |
+
"learning_rate": 1.6154810404261986e-07,
|
7560 |
+
"logits/chosen": -0.2861802577972412,
|
7561 |
+
"logits/rejected": -0.4479186534881592,
|
7562 |
+
"logps/chosen": -23.825702667236328,
|
7563 |
+
"logps/rejected": -559.0096435546875,
|
7564 |
+
"loss": 12589.4609,
|
7565 |
+
"rewards/accuracies": 0.987500011920929,
|
7566 |
+
"rewards/chosen": 0.1913156658411026,
|
7567 |
+
"rewards/margins": 0.5378258228302002,
|
7568 |
+
"rewards/rejected": -0.346510112285614,
|
7569 |
+
"step": 5030
|
7570 |
+
},
|
7571 |
+
{
|
7572 |
+
"epoch": 25.564556962025318,
|
7573 |
+
"grad_norm": 273297.2570355529,
|
7574 |
+
"learning_rate": 1.6076465057975556e-07,
|
7575 |
+
"logits/chosen": -2.0077948570251465,
|
7576 |
+
"logits/rejected": -1.546903371810913,
|
7577 |
+
"logps/chosen": -34.178993225097656,
|
7578 |
+
"logps/rejected": -599.1771240234375,
|
7579 |
+
"loss": 12277.0906,
|
7580 |
+
"rewards/accuracies": 1.0,
|
7581 |
+
"rewards/chosen": 0.20620949566364288,
|
7582 |
+
"rewards/margins": 0.5666217803955078,
|
7583 |
+
"rewards/rejected": -0.36041226983070374,
|
7584 |
+
"step": 5040
|
7585 |
+
},
|
7586 |
+
{
|
7587 |
+
"epoch": 25.615189873417723,
|
7588 |
+
"grad_norm": 287331.7702661688,
|
7589 |
+
"learning_rate": 1.5998119711689127e-07,
|
7590 |
+
"logits/chosen": -0.9829635620117188,
|
7591 |
+
"logits/rejected": -0.3811960220336914,
|
7592 |
+
"logps/chosen": -32.14269256591797,
|
7593 |
+
"logps/rejected": -580.4415283203125,
|
7594 |
+
"loss": 12507.3219,
|
7595 |
+
"rewards/accuracies": 1.0,
|
7596 |
+
"rewards/chosen": 0.20907440781593323,
|
7597 |
+
"rewards/margins": 0.5523373484611511,
|
7598 |
+
"rewards/rejected": -0.3432629406452179,
|
7599 |
+
"step": 5050
|
7600 |
+
},
|
7601 |
+
{
|
7602 |
+
"epoch": 25.665822784810125,
|
7603 |
+
"grad_norm": 896554.0294317787,
|
7604 |
+
"learning_rate": 1.5919774365402695e-07,
|
7605 |
+
"logits/chosen": -1.3259598016738892,
|
7606 |
+
"logits/rejected": -0.9525947570800781,
|
7607 |
+
"logps/chosen": -25.666656494140625,
|
7608 |
+
"logps/rejected": -573.1832885742188,
|
7609 |
+
"loss": 12955.9469,
|
7610 |
+
"rewards/accuracies": 1.0,
|
7611 |
+
"rewards/chosen": 0.19553272426128387,
|
7612 |
+
"rewards/margins": 0.5421277284622192,
|
7613 |
+
"rewards/rejected": -0.3465949594974518,
|
7614 |
+
"step": 5060
|
7615 |
+
},
|
7616 |
+
{
|
7617 |
+
"epoch": 25.71645569620253,
|
7618 |
+
"grad_norm": 360559.08966435614,
|
7619 |
+
"learning_rate": 1.5841429019116266e-07,
|
7620 |
+
"logits/chosen": -2.50518536567688,
|
7621 |
+
"logits/rejected": -2.6326870918273926,
|
7622 |
+
"logps/chosen": -40.73974609375,
|
7623 |
+
"logps/rejected": -598.9993896484375,
|
7624 |
+
"loss": 13192.7609,
|
7625 |
+
"rewards/accuracies": 1.0,
|
7626 |
+
"rewards/chosen": 0.2110958993434906,
|
7627 |
+
"rewards/margins": 0.559829592704773,
|
7628 |
+
"rewards/rejected": -0.34873366355895996,
|
7629 |
+
"step": 5070
|
7630 |
+
},
|
7631 |
+
{
|
7632 |
+
"epoch": 25.767088607594935,
|
7633 |
+
"grad_norm": 354200.8480985467,
|
7634 |
+
"learning_rate": 1.5763083672829833e-07,
|
7635 |
+
"logits/chosen": 0.24985246360301971,
|
7636 |
+
"logits/rejected": 0.11640717834234238,
|
7637 |
+
"logps/chosen": -30.384597778320312,
|
7638 |
+
"logps/rejected": -595.9378662109375,
|
7639 |
+
"loss": 13357.8156,
|
7640 |
+
"rewards/accuracies": 1.0,
|
7641 |
+
"rewards/chosen": 0.20246371626853943,
|
7642 |
+
"rewards/margins": 0.5666370391845703,
|
7643 |
+
"rewards/rejected": -0.3641732633113861,
|
7644 |
+
"step": 5080
|
7645 |
+
},
|
7646 |
+
{
|
7647 |
+
"epoch": 25.81772151898734,
|
7648 |
+
"grad_norm": 419630.4907858681,
|
7649 |
+
"learning_rate": 1.5684738326543404e-07,
|
7650 |
+
"logits/chosen": -2.382422924041748,
|
7651 |
+
"logits/rejected": -1.6780860424041748,
|
7652 |
+
"logps/chosen": -32.89704132080078,
|
7653 |
+
"logps/rejected": -596.2845458984375,
|
7654 |
+
"loss": 13075.125,
|
7655 |
+
"rewards/accuracies": 1.0,
|
7656 |
+
"rewards/chosen": 0.20516617596149445,
|
7657 |
+
"rewards/margins": 0.5647061467170715,
|
7658 |
+
"rewards/rejected": -0.3595399558544159,
|
7659 |
+
"step": 5090
|
7660 |
+
},
|
7661 |
+
{
|
7662 |
+
"epoch": 25.868354430379746,
|
7663 |
+
"grad_norm": 239893.19190802056,
|
7664 |
+
"learning_rate": 1.5606392980256972e-07,
|
7665 |
+
"logits/chosen": -1.5904518365859985,
|
7666 |
+
"logits/rejected": -1.162544846534729,
|
7667 |
+
"logps/chosen": -29.703998565673828,
|
7668 |
+
"logps/rejected": -562.6304321289062,
|
7669 |
+
"loss": 12907.7898,
|
7670 |
+
"rewards/accuracies": 1.0,
|
7671 |
+
"rewards/chosen": 0.1967582404613495,
|
7672 |
+
"rewards/margins": 0.5360093116760254,
|
7673 |
+
"rewards/rejected": -0.3392511010169983,
|
7674 |
+
"step": 5100
|
7675 |
+
},
|
7676 |
+
{
|
7677 |
+
"epoch": 25.91898734177215,
|
7678 |
+
"grad_norm": 2769163.91672907,
|
7679 |
+
"learning_rate": 1.5528047633970543e-07,
|
7680 |
+
"logits/chosen": -0.4542008936405182,
|
7681 |
+
"logits/rejected": 0.3750479519367218,
|
7682 |
+
"logps/chosen": -40.263450622558594,
|
7683 |
+
"logps/rejected": -569.8021240234375,
|
7684 |
+
"loss": 12356.1203,
|
7685 |
+
"rewards/accuracies": 0.9750000238418579,
|
7686 |
+
"rewards/chosen": 0.19747862219810486,
|
7687 |
+
"rewards/margins": 0.5331242680549622,
|
7688 |
+
"rewards/rejected": -0.3356456160545349,
|
7689 |
+
"step": 5110
|
7690 |
+
},
|
7691 |
+
{
|
7692 |
+
"epoch": 25.969620253164557,
|
7693 |
+
"grad_norm": 414959.45582905615,
|
7694 |
+
"learning_rate": 1.544970228768411e-07,
|
7695 |
+
"logits/chosen": -2.780273914337158,
|
7696 |
+
"logits/rejected": -2.477725028991699,
|
7697 |
+
"logps/chosen": -34.733909606933594,
|
7698 |
+
"logps/rejected": -598.5794677734375,
|
7699 |
+
"loss": 12866.1969,
|
7700 |
+
"rewards/accuracies": 1.0,
|
7701 |
+
"rewards/chosen": 0.22083155810832977,
|
7702 |
+
"rewards/margins": 0.5664650797843933,
|
7703 |
+
"rewards/rejected": -0.3456335663795471,
|
7704 |
+
"step": 5120
|
7705 |
}
|
7706 |
],
|
7707 |
"logging_steps": 10,
|