Upload folder using huggingface_hub
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1873 -3
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4996670464
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2913558abbf7c09f823f3d9149b0c3f3f8f8bcccc9b292c0095a5ac58c81928
|
3 |
size 4996670464
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1178224960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d1dd526bd7bbd181272b566e5c7e236590ff2c966e830278b4d1a8b80e28701
|
3 |
size 1178224960
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3094642882
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:635d436cc8d5e7a2a2efe1ed2a6519275a69bf2704b79004c45ffe40c45b243f
|
3 |
size 3094642882
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8596e8e763631f6218be973d23c17f87c5de1b18960b384dbd7a76dc1dcbe692
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1367,6 +1367,1876 @@
|
|
1367 |
"rewards/rejected": -0.49420568346977234,
|
1368 |
"sft_loss": 0.557674765586853,
|
1369 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1370 |
}
|
1371 |
],
|
1372 |
"logging_steps": 10,
|
@@ -1386,7 +3256,7 @@
|
|
1386 |
"attributes": {}
|
1387 |
}
|
1388 |
},
|
1389 |
-
"total_flos":
|
1390 |
"train_batch_size": 2,
|
1391 |
"trial_name": null,
|
1392 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.20013254392163232,
|
5 |
"eval_steps": 5000,
|
6 |
+
"global_step": 1900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1367 |
"rewards/rejected": -0.49420568346977234,
|
1368 |
"sft_loss": 0.557674765586853,
|
1369 |
"step": 800
|
1370 |
+
},
|
1371 |
+
{
|
1372 |
+
"epoch": 0.08531966346132745,
|
1373 |
+
"grad_norm": 5.111828327178955,
|
1374 |
+
"learning_rate": 0.0004263157894736842,
|
1375 |
+
"logits/chosen": -4.80694580078125,
|
1376 |
+
"logits/rejected": -4.806312561035156,
|
1377 |
+
"logps/chosen": -0.7565589547157288,
|
1378 |
+
"logps/rejected": -5.1447882652282715,
|
1379 |
+
"loss": 0.791,
|
1380 |
+
"odds_ratio_loss": 2.5595006942749023,
|
1381 |
+
"rewards/accuracies": 0.862500011920929,
|
1382 |
+
"rewards/chosen": -0.07565589994192123,
|
1383 |
+
"rewards/margins": 0.4388229250907898,
|
1384 |
+
"rewards/rejected": -0.514478862285614,
|
1385 |
+
"sft_loss": 0.5350964665412903,
|
1386 |
+
"step": 810
|
1387 |
+
},
|
1388 |
+
{
|
1389 |
+
"epoch": 0.08637299263986237,
|
1390 |
+
"grad_norm": 4.786795616149902,
|
1391 |
+
"learning_rate": 0.0004315789473684211,
|
1392 |
+
"logits/chosen": -4.826291561126709,
|
1393 |
+
"logits/rejected": -4.825577259063721,
|
1394 |
+
"logps/chosen": -0.6888704895973206,
|
1395 |
+
"logps/rejected": -5.567938804626465,
|
1396 |
+
"loss": 0.7202,
|
1397 |
+
"odds_ratio_loss": 2.4763171672821045,
|
1398 |
+
"rewards/accuracies": 0.8541666865348816,
|
1399 |
+
"rewards/chosen": -0.06888704746961594,
|
1400 |
+
"rewards/margins": 0.487906813621521,
|
1401 |
+
"rewards/rejected": -0.5567939281463623,
|
1402 |
+
"sft_loss": 0.472540020942688,
|
1403 |
+
"step": 820
|
1404 |
+
},
|
1405 |
+
{
|
1406 |
+
"epoch": 0.08742632181839727,
|
1407 |
+
"grad_norm": 7.63191556930542,
|
1408 |
+
"learning_rate": 0.00043684210526315795,
|
1409 |
+
"logits/chosen": -4.981302738189697,
|
1410 |
+
"logits/rejected": -4.980616569519043,
|
1411 |
+
"logps/chosen": -0.7095519304275513,
|
1412 |
+
"logps/rejected": -5.726536273956299,
|
1413 |
+
"loss": 0.7455,
|
1414 |
+
"odds_ratio_loss": 2.7154994010925293,
|
1415 |
+
"rewards/accuracies": 0.8458333611488342,
|
1416 |
+
"rewards/chosen": -0.07095518708229065,
|
1417 |
+
"rewards/margins": 0.5016984343528748,
|
1418 |
+
"rewards/rejected": -0.5726536512374878,
|
1419 |
+
"sft_loss": 0.4739212989807129,
|
1420 |
+
"step": 830
|
1421 |
+
},
|
1422 |
+
{
|
1423 |
+
"epoch": 0.08847965099693218,
|
1424 |
+
"grad_norm": 3.9395651817321777,
|
1425 |
+
"learning_rate": 0.0004421052631578947,
|
1426 |
+
"logits/chosen": -5.055291652679443,
|
1427 |
+
"logits/rejected": -5.054480075836182,
|
1428 |
+
"logps/chosen": -0.6979976892471313,
|
1429 |
+
"logps/rejected": -5.470063209533691,
|
1430 |
+
"loss": 0.7345,
|
1431 |
+
"odds_ratio_loss": 2.5650830268859863,
|
1432 |
+
"rewards/accuracies": 0.8354166746139526,
|
1433 |
+
"rewards/chosen": -0.0697997659444809,
|
1434 |
+
"rewards/margins": 0.4772065579891205,
|
1435 |
+
"rewards/rejected": -0.547006368637085,
|
1436 |
+
"sft_loss": 0.477975457906723,
|
1437 |
+
"step": 840
|
1438 |
+
},
|
1439 |
+
{
|
1440 |
+
"epoch": 0.08953298017546708,
|
1441 |
+
"grad_norm": 9.380526542663574,
|
1442 |
+
"learning_rate": 0.0004473684210526316,
|
1443 |
+
"logits/chosen": -4.9736008644104,
|
1444 |
+
"logits/rejected": -4.972882270812988,
|
1445 |
+
"logps/chosen": -0.6816462874412537,
|
1446 |
+
"logps/rejected": -5.888026714324951,
|
1447 |
+
"loss": 0.715,
|
1448 |
+
"odds_ratio_loss": 2.5402190685272217,
|
1449 |
+
"rewards/accuracies": 0.8583333492279053,
|
1450 |
+
"rewards/chosen": -0.0681646317243576,
|
1451 |
+
"rewards/margins": 0.520638108253479,
|
1452 |
+
"rewards/rejected": -0.588802695274353,
|
1453 |
+
"sft_loss": 0.46102267503738403,
|
1454 |
+
"step": 850
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"epoch": 0.090586309354002,
|
1458 |
+
"grad_norm": 3.032940626144409,
|
1459 |
+
"learning_rate": 0.00045263157894736845,
|
1460 |
+
"logits/chosen": -4.86979866027832,
|
1461 |
+
"logits/rejected": -4.869546413421631,
|
1462 |
+
"logps/chosen": -0.8415181636810303,
|
1463 |
+
"logps/rejected": -4.487551689147949,
|
1464 |
+
"loss": 0.8781,
|
1465 |
+
"odds_ratio_loss": 2.631443977355957,
|
1466 |
+
"rewards/accuracies": 0.8229166865348816,
|
1467 |
+
"rewards/chosen": -0.08415181934833527,
|
1468 |
+
"rewards/margins": 0.3646034300327301,
|
1469 |
+
"rewards/rejected": -0.44875526428222656,
|
1470 |
+
"sft_loss": 0.6149870157241821,
|
1471 |
+
"step": 860
|
1472 |
+
},
|
1473 |
+
{
|
1474 |
+
"epoch": 0.0916396385325369,
|
1475 |
+
"grad_norm": 5.457218170166016,
|
1476 |
+
"learning_rate": 0.00045789473684210527,
|
1477 |
+
"logits/chosen": -4.5640482902526855,
|
1478 |
+
"logits/rejected": -4.5640788078308105,
|
1479 |
+
"logps/chosen": -0.7579687833786011,
|
1480 |
+
"logps/rejected": -3.3095312118530273,
|
1481 |
+
"loss": 0.7946,
|
1482 |
+
"odds_ratio_loss": 2.6955649852752686,
|
1483 |
+
"rewards/accuracies": 0.831250011920929,
|
1484 |
+
"rewards/chosen": -0.07579687237739563,
|
1485 |
+
"rewards/margins": 0.2551562488079071,
|
1486 |
+
"rewards/rejected": -0.33095312118530273,
|
1487 |
+
"sft_loss": 0.5250447988510132,
|
1488 |
+
"step": 870
|
1489 |
+
},
|
1490 |
+
{
|
1491 |
+
"epoch": 0.0926929677110718,
|
1492 |
+
"grad_norm": 4.475607872009277,
|
1493 |
+
"learning_rate": 0.00046315789473684214,
|
1494 |
+
"logits/chosen": -4.721373558044434,
|
1495 |
+
"logits/rejected": -4.7214860916137695,
|
1496 |
+
"logps/chosen": -0.7569971680641174,
|
1497 |
+
"logps/rejected": -3.347615957260132,
|
1498 |
+
"loss": 0.7905,
|
1499 |
+
"odds_ratio_loss": 2.5438392162323,
|
1500 |
+
"rewards/accuracies": 0.831250011920929,
|
1501 |
+
"rewards/chosen": -0.07569971680641174,
|
1502 |
+
"rewards/margins": 0.25906190276145935,
|
1503 |
+
"rewards/rejected": -0.3347616195678711,
|
1504 |
+
"sft_loss": 0.536092221736908,
|
1505 |
+
"step": 880
|
1506 |
+
},
|
1507 |
+
{
|
1508 |
+
"epoch": 0.09374629688960671,
|
1509 |
+
"grad_norm": 31.67135238647461,
|
1510 |
+
"learning_rate": 0.00046842105263157895,
|
1511 |
+
"logits/chosen": -4.7270989418029785,
|
1512 |
+
"logits/rejected": -4.727247714996338,
|
1513 |
+
"logps/chosen": -0.7946822047233582,
|
1514 |
+
"logps/rejected": -3.15295147895813,
|
1515 |
+
"loss": 0.829,
|
1516 |
+
"odds_ratio_loss": 2.443174123764038,
|
1517 |
+
"rewards/accuracies": 0.8520833253860474,
|
1518 |
+
"rewards/chosen": -0.07946821302175522,
|
1519 |
+
"rewards/margins": 0.23582692444324493,
|
1520 |
+
"rewards/rejected": -0.31529513001441956,
|
1521 |
+
"sft_loss": 0.5846543908119202,
|
1522 |
+
"step": 890
|
1523 |
+
},
|
1524 |
+
{
|
1525 |
+
"epoch": 0.09479962606814162,
|
1526 |
+
"grad_norm": 3.2339320182800293,
|
1527 |
+
"learning_rate": 0.00047368421052631577,
|
1528 |
+
"logits/chosen": -5.0870866775512695,
|
1529 |
+
"logits/rejected": -5.087241172790527,
|
1530 |
+
"logps/chosen": -0.6878632307052612,
|
1531 |
+
"logps/rejected": -2.8736085891723633,
|
1532 |
+
"loss": 0.7248,
|
1533 |
+
"odds_ratio_loss": 2.5279555320739746,
|
1534 |
+
"rewards/accuracies": 0.862500011920929,
|
1535 |
+
"rewards/chosen": -0.06878631561994553,
|
1536 |
+
"rewards/margins": 0.21857453882694244,
|
1537 |
+
"rewards/rejected": -0.2873608469963074,
|
1538 |
+
"sft_loss": 0.47198787331581116,
|
1539 |
+
"step": 900
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"epoch": 0.09585295524667653,
|
1543 |
+
"grad_norm": 2.4642884731292725,
|
1544 |
+
"learning_rate": 0.00047894736842105264,
|
1545 |
+
"logits/chosen": -5.1205644607543945,
|
1546 |
+
"logits/rejected": -5.120718479156494,
|
1547 |
+
"logps/chosen": -0.6843523383140564,
|
1548 |
+
"logps/rejected": -3.210320472717285,
|
1549 |
+
"loss": 0.7126,
|
1550 |
+
"odds_ratio_loss": 2.3976242542266846,
|
1551 |
+
"rewards/accuracies": 0.8687499761581421,
|
1552 |
+
"rewards/chosen": -0.06843523681163788,
|
1553 |
+
"rewards/margins": 0.25259679555892944,
|
1554 |
+
"rewards/rejected": -0.3210320770740509,
|
1555 |
+
"sft_loss": 0.47283756732940674,
|
1556 |
+
"step": 910
|
1557 |
+
},
|
1558 |
+
{
|
1559 |
+
"epoch": 0.09690628442521143,
|
1560 |
+
"grad_norm": 5.920956611633301,
|
1561 |
+
"learning_rate": 0.0004842105263157895,
|
1562 |
+
"logits/chosen": -5.090200424194336,
|
1563 |
+
"logits/rejected": -5.090282917022705,
|
1564 |
+
"logps/chosen": -0.6722908616065979,
|
1565 |
+
"logps/rejected": -3.273742198944092,
|
1566 |
+
"loss": 0.7063,
|
1567 |
+
"odds_ratio_loss": 2.530402421951294,
|
1568 |
+
"rewards/accuracies": 0.84375,
|
1569 |
+
"rewards/chosen": -0.06722908467054367,
|
1570 |
+
"rewards/margins": 0.2601451575756073,
|
1571 |
+
"rewards/rejected": -0.3273741900920868,
|
1572 |
+
"sft_loss": 0.45327988266944885,
|
1573 |
+
"step": 920
|
1574 |
+
},
|
1575 |
+
{
|
1576 |
+
"epoch": 0.09795961360374635,
|
1577 |
+
"grad_norm": 4.960987567901611,
|
1578 |
+
"learning_rate": 0.0004894736842105264,
|
1579 |
+
"logits/chosen": -5.087591648101807,
|
1580 |
+
"logits/rejected": -5.087601661682129,
|
1581 |
+
"logps/chosen": -0.7244377136230469,
|
1582 |
+
"logps/rejected": -3.0884244441986084,
|
1583 |
+
"loss": 0.7571,
|
1584 |
+
"odds_ratio_loss": 2.6207199096679688,
|
1585 |
+
"rewards/accuracies": 0.8500000238418579,
|
1586 |
+
"rewards/chosen": -0.07244376838207245,
|
1587 |
+
"rewards/margins": 0.23639869689941406,
|
1588 |
+
"rewards/rejected": -0.3088424801826477,
|
1589 |
+
"sft_loss": 0.4949897825717926,
|
1590 |
+
"step": 930
|
1591 |
+
},
|
1592 |
+
{
|
1593 |
+
"epoch": 0.09901294278228125,
|
1594 |
+
"grad_norm": 5.539714336395264,
|
1595 |
+
"learning_rate": 0.0004947368421052632,
|
1596 |
+
"logits/chosen": -5.29771614074707,
|
1597 |
+
"logits/rejected": -5.297707557678223,
|
1598 |
+
"logps/chosen": -0.6988152265548706,
|
1599 |
+
"logps/rejected": -3.315657615661621,
|
1600 |
+
"loss": 0.7308,
|
1601 |
+
"odds_ratio_loss": 2.3126912117004395,
|
1602 |
+
"rewards/accuracies": 0.8645833134651184,
|
1603 |
+
"rewards/chosen": -0.06988153606653214,
|
1604 |
+
"rewards/margins": 0.26168423891067505,
|
1605 |
+
"rewards/rejected": -0.3315657675266266,
|
1606 |
+
"sft_loss": 0.4995124638080597,
|
1607 |
+
"step": 940
|
1608 |
+
},
|
1609 |
+
{
|
1610 |
+
"epoch": 0.10006627196081616,
|
1611 |
+
"grad_norm": 6.586909294128418,
|
1612 |
+
"learning_rate": 0.0005,
|
1613 |
+
"logits/chosen": -5.285855293273926,
|
1614 |
+
"logits/rejected": -5.2858428955078125,
|
1615 |
+
"logps/chosen": -0.6867055296897888,
|
1616 |
+
"logps/rejected": -3.43638277053833,
|
1617 |
+
"loss": 0.7161,
|
1618 |
+
"odds_ratio_loss": 2.5117852687835693,
|
1619 |
+
"rewards/accuracies": 0.8791666626930237,
|
1620 |
+
"rewards/chosen": -0.06867055594921112,
|
1621 |
+
"rewards/margins": 0.2749677002429962,
|
1622 |
+
"rewards/rejected": -0.34363824129104614,
|
1623 |
+
"sft_loss": 0.4649271070957184,
|
1624 |
+
"step": 950
|
1625 |
+
},
|
1626 |
+
{
|
1627 |
+
"epoch": 0.10111960113935106,
|
1628 |
+
"grad_norm": 6.756776809692383,
|
1629 |
+
"learning_rate": 0.0004999983096040005,
|
1630 |
+
"logits/chosen": -5.550118923187256,
|
1631 |
+
"logits/rejected": -5.5501179695129395,
|
1632 |
+
"logps/chosen": -0.7224279642105103,
|
1633 |
+
"logps/rejected": -3.376439332962036,
|
1634 |
+
"loss": 0.7555,
|
1635 |
+
"odds_ratio_loss": 2.5149753093719482,
|
1636 |
+
"rewards/accuracies": 0.8604166507720947,
|
1637 |
+
"rewards/chosen": -0.07224280387163162,
|
1638 |
+
"rewards/margins": 0.2654011845588684,
|
1639 |
+
"rewards/rejected": -0.3376440107822418,
|
1640 |
+
"sft_loss": 0.5040432214736938,
|
1641 |
+
"step": 960
|
1642 |
+
},
|
1643 |
+
{
|
1644 |
+
"epoch": 0.10217293031788596,
|
1645 |
+
"grad_norm": 161.19760131835938,
|
1646 |
+
"learning_rate": 0.0004999932384388613,
|
1647 |
+
"logits/chosen": -5.225105285644531,
|
1648 |
+
"logits/rejected": -5.225213527679443,
|
1649 |
+
"logps/chosen": -1.5466647148132324,
|
1650 |
+
"logps/rejected": -3.935602903366089,
|
1651 |
+
"loss": 1.5839,
|
1652 |
+
"odds_ratio_loss": 3.059565782546997,
|
1653 |
+
"rewards/accuracies": 0.8604166507720947,
|
1654 |
+
"rewards/chosen": -0.154666468501091,
|
1655 |
+
"rewards/margins": 0.2388937920331955,
|
1656 |
+
"rewards/rejected": -0.3935602605342865,
|
1657 |
+
"sft_loss": 1.277909755706787,
|
1658 |
+
"step": 970
|
1659 |
+
},
|
1660 |
+
{
|
1661 |
+
"epoch": 0.10322625949642088,
|
1662 |
+
"grad_norm": 4.612696170806885,
|
1663 |
+
"learning_rate": 0.000499984786573161,
|
1664 |
+
"logits/chosen": -5.253983020782471,
|
1665 |
+
"logits/rejected": -5.254087448120117,
|
1666 |
+
"logps/chosen": -0.6865792870521545,
|
1667 |
+
"logps/rejected": -3.219088077545166,
|
1668 |
+
"loss": 0.7214,
|
1669 |
+
"odds_ratio_loss": 2.43359375,
|
1670 |
+
"rewards/accuracies": 0.8166666626930237,
|
1671 |
+
"rewards/chosen": -0.06865792721509933,
|
1672 |
+
"rewards/margins": 0.2532508671283722,
|
1673 |
+
"rewards/rejected": -0.3219088315963745,
|
1674 |
+
"sft_loss": 0.4780765473842621,
|
1675 |
+
"step": 980
|
1676 |
+
},
|
1677 |
+
{
|
1678 |
+
"epoch": 0.10427958867495578,
|
1679 |
+
"grad_norm": 4.7582621574401855,
|
1680 |
+
"learning_rate": 0.0004999729541211952,
|
1681 |
+
"logits/chosen": -5.1987152099609375,
|
1682 |
+
"logits/rejected": -5.198534965515137,
|
1683 |
+
"logps/chosen": -0.8549334406852722,
|
1684 |
+
"logps/rejected": -4.291516304016113,
|
1685 |
+
"loss": 0.8956,
|
1686 |
+
"odds_ratio_loss": 2.6601970195770264,
|
1687 |
+
"rewards/accuracies": 0.8125,
|
1688 |
+
"rewards/chosen": -0.08549333363771439,
|
1689 |
+
"rewards/margins": 0.3436582684516907,
|
1690 |
+
"rewards/rejected": -0.42915162444114685,
|
1691 |
+
"sft_loss": 0.6296234726905823,
|
1692 |
+
"step": 990
|
1693 |
+
},
|
1694 |
+
{
|
1695 |
+
"epoch": 0.10533291785349069,
|
1696 |
+
"grad_norm": 5.924813747406006,
|
1697 |
+
"learning_rate": 0.0004999577412429764,
|
1698 |
+
"logits/chosen": -5.115817070007324,
|
1699 |
+
"logits/rejected": -5.115783214569092,
|
1700 |
+
"logps/chosen": -0.6959132552146912,
|
1701 |
+
"logps/rejected": -3.6701784133911133,
|
1702 |
+
"loss": 0.7282,
|
1703 |
+
"odds_ratio_loss": 2.448162078857422,
|
1704 |
+
"rewards/accuracies": 0.8374999761581421,
|
1705 |
+
"rewards/chosen": -0.06959132105112076,
|
1706 |
+
"rewards/margins": 0.29742658138275146,
|
1707 |
+
"rewards/rejected": -0.36701786518096924,
|
1708 |
+
"sft_loss": 0.48338958621025085,
|
1709 |
+
"step": 1000
|
1710 |
+
},
|
1711 |
+
{
|
1712 |
+
"epoch": 0.1063862470320256,
|
1713 |
+
"grad_norm": 3.775995969772339,
|
1714 |
+
"learning_rate": 0.0004999391481442307,
|
1715 |
+
"logits/chosen": -5.038882732391357,
|
1716 |
+
"logits/rejected": -5.039083003997803,
|
1717 |
+
"logps/chosen": -0.6505192518234253,
|
1718 |
+
"logps/rejected": -2.7321646213531494,
|
1719 |
+
"loss": 0.686,
|
1720 |
+
"odds_ratio_loss": 2.5259392261505127,
|
1721 |
+
"rewards/accuracies": 0.8395833373069763,
|
1722 |
+
"rewards/chosen": -0.06505192071199417,
|
1723 |
+
"rewards/margins": 0.2081645280122757,
|
1724 |
+
"rewards/rejected": -0.2732164263725281,
|
1725 |
+
"sft_loss": 0.43344053626060486,
|
1726 |
+
"step": 1010
|
1727 |
+
},
|
1728 |
+
{
|
1729 |
+
"epoch": 0.10743957621056051,
|
1730 |
+
"grad_norm": 3.577996253967285,
|
1731 |
+
"learning_rate": 0.0004999171750763959,
|
1732 |
+
"logits/chosen": -4.925287246704102,
|
1733 |
+
"logits/rejected": -4.925505638122559,
|
1734 |
+
"logps/chosen": -0.6491485238075256,
|
1735 |
+
"logps/rejected": -2.673661947250366,
|
1736 |
+
"loss": 0.683,
|
1737 |
+
"odds_ratio_loss": 2.427180528640747,
|
1738 |
+
"rewards/accuracies": 0.862500011920929,
|
1739 |
+
"rewards/chosen": -0.06491485238075256,
|
1740 |
+
"rewards/margins": 0.20245136320590973,
|
1741 |
+
"rewards/rejected": -0.2673662006855011,
|
1742 |
+
"sft_loss": 0.44026196002960205,
|
1743 |
+
"step": 1020
|
1744 |
+
},
|
1745 |
+
{
|
1746 |
+
"epoch": 0.10849290538909541,
|
1747 |
+
"grad_norm": 4.37986946105957,
|
1748 |
+
"learning_rate": 0.0004998918223366173,
|
1749 |
+
"logits/chosen": -5.010101318359375,
|
1750 |
+
"logits/rejected": -5.010295391082764,
|
1751 |
+
"logps/chosen": -0.7345671057701111,
|
1752 |
+
"logps/rejected": -2.6488146781921387,
|
1753 |
+
"loss": 0.7723,
|
1754 |
+
"odds_ratio_loss": 2.661365509033203,
|
1755 |
+
"rewards/accuracies": 0.8291666507720947,
|
1756 |
+
"rewards/chosen": -0.07345671951770782,
|
1757 |
+
"rewards/margins": 0.19142475724220276,
|
1758 |
+
"rewards/rejected": -0.2648814618587494,
|
1759 |
+
"sft_loss": 0.5061719417572021,
|
1760 |
+
"step": 1030
|
1761 |
+
},
|
1762 |
+
{
|
1763 |
+
"epoch": 0.10954623456763031,
|
1764 |
+
"grad_norm": 8.366073608398438,
|
1765 |
+
"learning_rate": 0.0004998630902677444,
|
1766 |
+
"logits/chosen": -5.033807277679443,
|
1767 |
+
"logits/rejected": -5.034041881561279,
|
1768 |
+
"logps/chosen": -0.7631211876869202,
|
1769 |
+
"logps/rejected": -2.8217647075653076,
|
1770 |
+
"loss": 0.8007,
|
1771 |
+
"odds_ratio_loss": 2.807353973388672,
|
1772 |
+
"rewards/accuracies": 0.84375,
|
1773 |
+
"rewards/chosen": -0.0763121098279953,
|
1774 |
+
"rewards/margins": 0.2058643400669098,
|
1775 |
+
"rewards/rejected": -0.2821764647960663,
|
1776 |
+
"sft_loss": 0.5199962854385376,
|
1777 |
+
"step": 1040
|
1778 |
+
},
|
1779 |
+
{
|
1780 |
+
"epoch": 0.11059956374616522,
|
1781 |
+
"grad_norm": 3.063091993331909,
|
1782 |
+
"learning_rate": 0.0004998309792583257,
|
1783 |
+
"logits/chosen": -5.056707859039307,
|
1784 |
+
"logits/rejected": -5.056928634643555,
|
1785 |
+
"logps/chosen": -0.7930384278297424,
|
1786 |
+
"logps/rejected": -3.048496723175049,
|
1787 |
+
"loss": 0.8372,
|
1788 |
+
"odds_ratio_loss": 2.895695209503174,
|
1789 |
+
"rewards/accuracies": 0.7749999761581421,
|
1790 |
+
"rewards/chosen": -0.07930383831262589,
|
1791 |
+
"rewards/margins": 0.22554583847522736,
|
1792 |
+
"rewards/rejected": -0.30484965443611145,
|
1793 |
+
"sft_loss": 0.5476340651512146,
|
1794 |
+
"step": 1050
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 0.11165289292470013,
|
1798 |
+
"grad_norm": 4.035704612731934,
|
1799 |
+
"learning_rate": 0.0004997954897426039,
|
1800 |
+
"logits/chosen": -4.731100559234619,
|
1801 |
+
"logits/rejected": -4.731292247772217,
|
1802 |
+
"logps/chosen": -0.7024089694023132,
|
1803 |
+
"logps/rejected": -3.2537832260131836,
|
1804 |
+
"loss": 0.7407,
|
1805 |
+
"odds_ratio_loss": 2.768791675567627,
|
1806 |
+
"rewards/accuracies": 0.8187500238418579,
|
1807 |
+
"rewards/chosen": -0.07024088501930237,
|
1808 |
+
"rewards/margins": 0.25513747334480286,
|
1809 |
+
"rewards/rejected": -0.3253783583641052,
|
1810 |
+
"sft_loss": 0.46386364102363586,
|
1811 |
+
"step": 1060
|
1812 |
+
},
|
1813 |
+
{
|
1814 |
+
"epoch": 0.11270622210323504,
|
1815 |
+
"grad_norm": 5.625176906585693,
|
1816 |
+
"learning_rate": 0.0004997566222005095,
|
1817 |
+
"logits/chosen": -5.09245491027832,
|
1818 |
+
"logits/rejected": -5.0925984382629395,
|
1819 |
+
"logps/chosen": -0.7186325192451477,
|
1820 |
+
"logps/rejected": -3.2234888076782227,
|
1821 |
+
"loss": 0.7539,
|
1822 |
+
"odds_ratio_loss": 2.5938258171081543,
|
1823 |
+
"rewards/accuracies": 0.856249988079071,
|
1824 |
+
"rewards/chosen": -0.07186325639486313,
|
1825 |
+
"rewards/margins": 0.2504856288433075,
|
1826 |
+
"rewards/rejected": -0.3223489224910736,
|
1827 |
+
"sft_loss": 0.4945569932460785,
|
1828 |
+
"step": 1070
|
1829 |
+
},
|
1830 |
+
{
|
1831 |
+
"epoch": 0.11375955128176994,
|
1832 |
+
"grad_norm": 8.753211975097656,
|
1833 |
+
"learning_rate": 0.0004997143771576551,
|
1834 |
+
"logits/chosen": -5.339606761932373,
|
1835 |
+
"logits/rejected": -5.339755058288574,
|
1836 |
+
"logps/chosen": -0.6703072190284729,
|
1837 |
+
"logps/rejected": -3.089592933654785,
|
1838 |
+
"loss": 0.706,
|
1839 |
+
"odds_ratio_loss": 2.5606048107147217,
|
1840 |
+
"rewards/accuracies": 0.8208333253860474,
|
1841 |
+
"rewards/chosen": -0.06703073531389236,
|
1842 |
+
"rewards/margins": 0.2419285774230957,
|
1843 |
+
"rewards/rejected": -0.30895933508872986,
|
1844 |
+
"sft_loss": 0.44997820258140564,
|
1845 |
+
"step": 1080
|
1846 |
+
},
|
1847 |
+
{
|
1848 |
+
"epoch": 0.11481288046030486,
|
1849 |
+
"grad_norm": 7.652149677276611,
|
1850 |
+
"learning_rate": 0.0004996687551853271,
|
1851 |
+
"logits/chosen": -5.191481113433838,
|
1852 |
+
"logits/rejected": -5.191573619842529,
|
1853 |
+
"logps/chosen": -0.7030736207962036,
|
1854 |
+
"logps/rejected": -3.310757875442505,
|
1855 |
+
"loss": 0.7353,
|
1856 |
+
"odds_ratio_loss": 2.5654568672180176,
|
1857 |
+
"rewards/accuracies": 0.8645833134651184,
|
1858 |
+
"rewards/chosen": -0.07030736654996872,
|
1859 |
+
"rewards/margins": 0.26076844334602356,
|
1860 |
+
"rewards/rejected": -0.3310757875442505,
|
1861 |
+
"sft_loss": 0.47873547673225403,
|
1862 |
+
"step": 1090
|
1863 |
+
},
|
1864 |
+
{
|
1865 |
+
"epoch": 0.11586620963883976,
|
1866 |
+
"grad_norm": 4.607894420623779,
|
1867 |
+
"learning_rate": 0.0004996197569004794,
|
1868 |
+
"logits/chosen": -4.798411846160889,
|
1869 |
+
"logits/rejected": -4.798523426055908,
|
1870 |
+
"logps/chosen": -0.6611460447311401,
|
1871 |
+
"logps/rejected": -2.983532428741455,
|
1872 |
+
"loss": 0.6988,
|
1873 |
+
"odds_ratio_loss": 2.426917314529419,
|
1874 |
+
"rewards/accuracies": 0.824999988079071,
|
1875 |
+
"rewards/chosen": -0.06611461192369461,
|
1876 |
+
"rewards/margins": 0.23223866522312164,
|
1877 |
+
"rewards/rejected": -0.29835325479507446,
|
1878 |
+
"sft_loss": 0.4561263918876648,
|
1879 |
+
"step": 1100
|
1880 |
+
},
|
1881 |
+
{
|
1882 |
+
"epoch": 0.11691953881737467,
|
1883 |
+
"grad_norm": 6.003838539123535,
|
1884 |
+
"learning_rate": 0.000499567382965724,
|
1885 |
+
"logits/chosen": -4.689076900482178,
|
1886 |
+
"logits/rejected": -4.689194202423096,
|
1887 |
+
"logps/chosen": -0.6927405595779419,
|
1888 |
+
"logps/rejected": -3.072282552719116,
|
1889 |
+
"loss": 0.7266,
|
1890 |
+
"odds_ratio_loss": 2.494777202606201,
|
1891 |
+
"rewards/accuracies": 0.8520833253860474,
|
1892 |
+
"rewards/chosen": -0.06927405297756195,
|
1893 |
+
"rewards/margins": 0.23795419931411743,
|
1894 |
+
"rewards/rejected": -0.3072282373905182,
|
1895 |
+
"sft_loss": 0.477167546749115,
|
1896 |
+
"step": 1110
|
1897 |
+
},
|
1898 |
+
{
|
1899 |
+
"epoch": 0.11797286799590957,
|
1900 |
+
"grad_norm": 3.377394676208496,
|
1901 |
+
"learning_rate": 0.0004995116340893223,
|
1902 |
+
"logits/chosen": -4.666645526885986,
|
1903 |
+
"logits/rejected": -4.666871547698975,
|
1904 |
+
"logps/chosen": -0.6164705157279968,
|
1905 |
+
"logps/rejected": -3.2369017601013184,
|
1906 |
+
"loss": 0.6451,
|
1907 |
+
"odds_ratio_loss": 2.411543369293213,
|
1908 |
+
"rewards/accuracies": 0.8645833134651184,
|
1909 |
+
"rewards/chosen": -0.061647046357393265,
|
1910 |
+
"rewards/margins": 0.26204314827919006,
|
1911 |
+
"rewards/rejected": -0.3236902058124542,
|
1912 |
+
"sft_loss": 0.40390655398368835,
|
1913 |
+
"step": 1120
|
1914 |
+
},
|
1915 |
+
{
|
1916 |
+
"epoch": 0.11902619717444447,
|
1917 |
+
"grad_norm": 3.650819778442383,
|
1918 |
+
"learning_rate": 0.0004994525110251759,
|
1919 |
+
"logits/chosen": -5.011782169342041,
|
1920 |
+
"logits/rejected": -5.012125492095947,
|
1921 |
+
"logps/chosen": -0.8005008101463318,
|
1922 |
+
"logps/rejected": -3.1773478984832764,
|
1923 |
+
"loss": 0.8349,
|
1924 |
+
"odds_ratio_loss": 2.8024778366088867,
|
1925 |
+
"rewards/accuracies": 0.8354166746139526,
|
1926 |
+
"rewards/chosen": -0.08005008101463318,
|
1927 |
+
"rewards/margins": 0.2376846969127655,
|
1928 |
+
"rewards/rejected": -0.3177347779273987,
|
1929 |
+
"sft_loss": 0.5546395182609558,
|
1930 |
+
"step": 1130
|
1931 |
+
},
|
1932 |
+
{
|
1933 |
+
"epoch": 0.12007952635297939,
|
1934 |
+
"grad_norm": 5.192956447601318,
|
1935 |
+
"learning_rate": 0.0004993900145728157,
|
1936 |
+
"logits/chosen": -5.2029805183410645,
|
1937 |
+
"logits/rejected": -5.203344821929932,
|
1938 |
+
"logps/chosen": -0.7520886063575745,
|
1939 |
+
"logps/rejected": -3.3132715225219727,
|
1940 |
+
"loss": 0.7852,
|
1941 |
+
"odds_ratio_loss": 2.5903329849243164,
|
1942 |
+
"rewards/accuracies": 0.84375,
|
1943 |
+
"rewards/chosen": -0.07520885765552521,
|
1944 |
+
"rewards/margins": 0.2561182677745819,
|
1945 |
+
"rewards/rejected": -0.3313271403312683,
|
1946 |
+
"sft_loss": 0.5261538624763489,
|
1947 |
+
"step": 1140
|
1948 |
+
},
|
1949 |
+
{
|
1950 |
+
"epoch": 0.12113285553151429,
|
1951 |
+
"grad_norm": 7.4040985107421875,
|
1952 |
+
"learning_rate": 0.0004993241455773918,
|
1953 |
+
"logits/chosen": -5.165520191192627,
|
1954 |
+
"logits/rejected": -5.165550708770752,
|
1955 |
+
"logps/chosen": -0.7126467227935791,
|
1956 |
+
"logps/rejected": -3.5545897483825684,
|
1957 |
+
"loss": 0.748,
|
1958 |
+
"odds_ratio_loss": 2.594726800918579,
|
1959 |
+
"rewards/accuracies": 0.8354166746139526,
|
1960 |
+
"rewards/chosen": -0.07126467674970627,
|
1961 |
+
"rewards/margins": 0.28419435024261475,
|
1962 |
+
"rewards/rejected": -0.3554590046405792,
|
1963 |
+
"sft_loss": 0.48852840065956116,
|
1964 |
+
"step": 1150
|
1965 |
+
},
|
1966 |
+
{
|
1967 |
+
"epoch": 0.1221861847100492,
|
1968 |
+
"grad_norm": 4.905418872833252,
|
1969 |
+
"learning_rate": 0.0004992549049296619,
|
1970 |
+
"logits/chosen": -5.190316200256348,
|
1971 |
+
"logits/rejected": -5.190418720245361,
|
1972 |
+
"logps/chosen": -0.6702563762664795,
|
1973 |
+
"logps/rejected": -3.4835519790649414,
|
1974 |
+
"loss": 0.7052,
|
1975 |
+
"odds_ratio_loss": 2.497361183166504,
|
1976 |
+
"rewards/accuracies": 0.8500000238418579,
|
1977 |
+
"rewards/chosen": -0.06702563911676407,
|
1978 |
+
"rewards/margins": 0.28132954239845276,
|
1979 |
+
"rewards/rejected": -0.3483552038669586,
|
1980 |
+
"sft_loss": 0.4554961621761322,
|
1981 |
+
"step": 1160
|
1982 |
+
},
|
1983 |
+
{
|
1984 |
+
"epoch": 0.1232395138885841,
|
1985 |
+
"grad_norm": 4.1152238845825195,
|
1986 |
+
"learning_rate": 0.0004991822935659786,
|
1987 |
+
"logits/chosen": -5.477373123168945,
|
1988 |
+
"logits/rejected": -5.477328300476074,
|
1989 |
+
"logps/chosen": -0.8472169637680054,
|
1990 |
+
"logps/rejected": -3.917269229888916,
|
1991 |
+
"loss": 0.8817,
|
1992 |
+
"odds_ratio_loss": 3.221430540084839,
|
1993 |
+
"rewards/accuracies": 0.8416666388511658,
|
1994 |
+
"rewards/chosen": -0.08472169190645218,
|
1995 |
+
"rewards/margins": 0.30700525641441345,
|
1996 |
+
"rewards/rejected": -0.39172691106796265,
|
1997 |
+
"sft_loss": 0.5595788955688477,
|
1998 |
+
"step": 1170
|
1999 |
+
},
|
2000 |
+
{
|
2001 |
+
"epoch": 0.12429284306711902,
|
2002 |
+
"grad_norm": 5.008730888366699,
|
2003 |
+
"learning_rate": 0.0004991063124682778,
|
2004 |
+
"logits/chosen": -5.323733329772949,
|
2005 |
+
"logits/rejected": -5.323288917541504,
|
2006 |
+
"logps/chosen": -0.757644534111023,
|
2007 |
+
"logps/rejected": -5.853792190551758,
|
2008 |
+
"loss": 0.7925,
|
2009 |
+
"odds_ratio_loss": 2.670191526412964,
|
2010 |
+
"rewards/accuracies": 0.8500000238418579,
|
2011 |
+
"rewards/chosen": -0.07576445490121841,
|
2012 |
+
"rewards/margins": 0.5096147656440735,
|
2013 |
+
"rewards/rejected": -0.5853793025016785,
|
2014 |
+
"sft_loss": 0.5255211591720581,
|
2015 |
+
"step": 1180
|
2016 |
+
},
|
2017 |
+
{
|
2018 |
+
"epoch": 0.12534617224565392,
|
2019 |
+
"grad_norm": 31.133501052856445,
|
2020 |
+
"learning_rate": 0.0004990269626640645,
|
2021 |
+
"logits/chosen": -5.64047384262085,
|
2022 |
+
"logits/rejected": -5.64005184173584,
|
2023 |
+
"logps/chosen": -0.768204391002655,
|
2024 |
+
"logps/rejected": -5.164267063140869,
|
2025 |
+
"loss": 0.8058,
|
2026 |
+
"odds_ratio_loss": 2.815326452255249,
|
2027 |
+
"rewards/accuracies": 0.8354166746139526,
|
2028 |
+
"rewards/chosen": -0.07682044059038162,
|
2029 |
+
"rewards/margins": 0.43960627913475037,
|
2030 |
+
"rewards/rejected": -0.516426682472229,
|
2031 |
+
"sft_loss": 0.5242764949798584,
|
2032 |
+
"step": 1190
|
2033 |
+
},
|
2034 |
+
{
|
2035 |
+
"epoch": 0.12639950142418882,
|
2036 |
+
"grad_norm": 6.6835222244262695,
|
2037 |
+
"learning_rate": 0.0004989442452263996,
|
2038 |
+
"logits/chosen": -5.151331424713135,
|
2039 |
+
"logits/rejected": -5.1510162353515625,
|
2040 |
+
"logps/chosen": -0.8430954217910767,
|
2041 |
+
"logps/rejected": -4.105188369750977,
|
2042 |
+
"loss": 0.8846,
|
2043 |
+
"odds_ratio_loss": 3.0806498527526855,
|
2044 |
+
"rewards/accuracies": 0.7749999761581421,
|
2045 |
+
"rewards/chosen": -0.08430954068899155,
|
2046 |
+
"rewards/margins": 0.32620927691459656,
|
2047 |
+
"rewards/rejected": -0.4105188250541687,
|
2048 |
+
"sft_loss": 0.5764933228492737,
|
2049 |
+
"step": 1200
|
2050 |
+
},
|
2051 |
+
{
|
2052 |
+
"epoch": 0.12745283060272372,
|
2053 |
+
"grad_norm": 6.060980796813965,
|
2054 |
+
"learning_rate": 0.0004988581612738847,
|
2055 |
+
"logits/chosen": -5.232106685638428,
|
2056 |
+
"logits/rejected": -5.2317962646484375,
|
2057 |
+
"logps/chosen": -0.7580560445785522,
|
2058 |
+
"logps/rejected": -3.9481935501098633,
|
2059 |
+
"loss": 0.7955,
|
2060 |
+
"odds_ratio_loss": 2.8234329223632812,
|
2061 |
+
"rewards/accuracies": 0.8187500238418579,
|
2062 |
+
"rewards/chosen": -0.07580561190843582,
|
2063 |
+
"rewards/margins": 0.319013774394989,
|
2064 |
+
"rewards/rejected": -0.39481934905052185,
|
2065 |
+
"sft_loss": 0.5131634473800659,
|
2066 |
+
"step": 1210
|
2067 |
+
},
|
2068 |
+
{
|
2069 |
+
"epoch": 0.12850615978125865,
|
2070 |
+
"grad_norm": 4.154183864593506,
|
2071 |
+
"learning_rate": 0.0004987687119706477,
|
2072 |
+
"logits/chosen": -5.385165214538574,
|
2073 |
+
"logits/rejected": -5.385090351104736,
|
2074 |
+
"logps/chosen": -0.7838355302810669,
|
2075 |
+
"logps/rejected": -3.81575608253479,
|
2076 |
+
"loss": 0.8198,
|
2077 |
+
"odds_ratio_loss": 2.7132718563079834,
|
2078 |
+
"rewards/accuracies": 0.8479166626930237,
|
2079 |
+
"rewards/chosen": -0.07838355004787445,
|
2080 |
+
"rewards/margins": 0.30319201946258545,
|
2081 |
+
"rewards/rejected": -0.3815755844116211,
|
2082 |
+
"sft_loss": 0.5484524965286255,
|
2083 |
+
"step": 1220
|
2084 |
+
},
|
2085 |
+
{
|
2086 |
+
"epoch": 0.12955948895979355,
|
2087 |
+
"grad_norm": 4.047881603240967,
|
2088 |
+
"learning_rate": 0.0004986758985263265,
|
2089 |
+
"logits/chosen": -5.267871379852295,
|
2090 |
+
"logits/rejected": -5.267872333526611,
|
2091 |
+
"logps/chosen": -0.7567169666290283,
|
2092 |
+
"logps/rejected": -2.9766650199890137,
|
2093 |
+
"loss": 0.7939,
|
2094 |
+
"odds_ratio_loss": 2.620311975479126,
|
2095 |
+
"rewards/accuracies": 0.8187500238418579,
|
2096 |
+
"rewards/chosen": -0.07567168772220612,
|
2097 |
+
"rewards/margins": 0.2219947874546051,
|
2098 |
+
"rewards/rejected": -0.2976664900779724,
|
2099 |
+
"sft_loss": 0.5318555235862732,
|
2100 |
+
"step": 1230
|
2101 |
+
},
|
2102 |
+
{
|
2103 |
+
"epoch": 0.13061281813832845,
|
2104 |
+
"grad_norm": 2.7552692890167236,
|
2105 |
+
"learning_rate": 0.0004985797221960529,
|
2106 |
+
"logits/chosen": -5.264489650726318,
|
2107 |
+
"logits/rejected": -5.264598369598389,
|
2108 |
+
"logps/chosen": -0.7095692753791809,
|
2109 |
+
"logps/rejected": -3.2434911727905273,
|
2110 |
+
"loss": 0.7445,
|
2111 |
+
"odds_ratio_loss": 2.7399401664733887,
|
2112 |
+
"rewards/accuracies": 0.8458333611488342,
|
2113 |
+
"rewards/chosen": -0.07095693051815033,
|
2114 |
+
"rewards/margins": 0.25339218974113464,
|
2115 |
+
"rewards/rejected": -0.3243491053581238,
|
2116 |
+
"sft_loss": 0.47051993012428284,
|
2117 |
+
"step": 1240
|
2118 |
+
},
|
2119 |
+
{
|
2120 |
+
"epoch": 0.13166614731686335,
|
2121 |
+
"grad_norm": 3.3210740089416504,
|
2122 |
+
"learning_rate": 0.0004984801842804357,
|
2123 |
+
"logits/chosen": -5.190452575683594,
|
2124 |
+
"logits/rejected": -5.190454959869385,
|
2125 |
+
"logps/chosen": -0.6608388423919678,
|
2126 |
+
"logps/rejected": -3.0926926136016846,
|
2127 |
+
"loss": 0.6955,
|
2128 |
+
"odds_ratio_loss": 2.5877368450164795,
|
2129 |
+
"rewards/accuracies": 0.8541666865348816,
|
2130 |
+
"rewards/chosen": -0.0660838857293129,
|
2131 |
+
"rewards/margins": 0.2431853860616684,
|
2132 |
+
"rewards/rejected": -0.3092692792415619,
|
2133 |
+
"sft_loss": 0.43669426441192627,
|
2134 |
+
"step": 1250
|
2135 |
+
},
|
2136 |
+
{
|
2137 |
+
"epoch": 0.13271947649539828,
|
2138 |
+
"grad_norm": 2.113543748855591,
|
2139 |
+
"learning_rate": 0.0004983772861255426,
|
2140 |
+
"logits/chosen": -5.355245113372803,
|
2141 |
+
"logits/rejected": -5.3552327156066895,
|
2142 |
+
"logps/chosen": -0.7177728414535522,
|
2143 |
+
"logps/rejected": -3.0373685359954834,
|
2144 |
+
"loss": 0.7558,
|
2145 |
+
"odds_ratio_loss": 2.9420340061187744,
|
2146 |
+
"rewards/accuracies": 0.8020833134651184,
|
2147 |
+
"rewards/chosen": -0.07177729159593582,
|
2148 |
+
"rewards/margins": 0.23195955157279968,
|
2149 |
+
"rewards/rejected": -0.3037368357181549,
|
2150 |
+
"sft_loss": 0.4615623354911804,
|
2151 |
+
"step": 1260
|
2152 |
+
},
|
2153 |
+
{
|
2154 |
+
"epoch": 0.13377280567393318,
|
2155 |
+
"grad_norm": 2.6125612258911133,
|
2156 |
+
"learning_rate": 0.0004982710291228828,
|
2157 |
+
"logits/chosen": -5.5682549476623535,
|
2158 |
+
"logits/rejected": -5.568284034729004,
|
2159 |
+
"logps/chosen": -0.7032897472381592,
|
2160 |
+
"logps/rejected": -3.1011478900909424,
|
2161 |
+
"loss": 0.7454,
|
2162 |
+
"odds_ratio_loss": 2.6539957523345947,
|
2163 |
+
"rewards/accuracies": 0.793749988079071,
|
2164 |
+
"rewards/chosen": -0.0703289657831192,
|
2165 |
+
"rewards/margins": 0.23978586494922638,
|
2166 |
+
"rewards/rejected": -0.3101148307323456,
|
2167 |
+
"sft_loss": 0.4799610376358032,
|
2168 |
+
"step": 1270
|
2169 |
+
},
|
2170 |
+
{
|
2171 |
+
"epoch": 0.13482613485246808,
|
2172 |
+
"grad_norm": 4.396284580230713,
|
2173 |
+
"learning_rate": 0.0004981614147093875,
|
2174 |
+
"logits/chosen": -5.787298202514648,
|
2175 |
+
"logits/rejected": -5.787331581115723,
|
2176 |
+
"logps/chosen": -0.7411842942237854,
|
2177 |
+
"logps/rejected": -3.3715906143188477,
|
2178 |
+
"loss": 0.7797,
|
2179 |
+
"odds_ratio_loss": 2.705798625946045,
|
2180 |
+
"rewards/accuracies": 0.824999988079071,
|
2181 |
+
"rewards/chosen": -0.07411842048168182,
|
2182 |
+
"rewards/margins": 0.2630406320095062,
|
2183 |
+
"rewards/rejected": -0.33715906739234924,
|
2184 |
+
"sft_loss": 0.5091153979301453,
|
2185 |
+
"step": 1280
|
2186 |
+
},
|
2187 |
+
{
|
2188 |
+
"epoch": 0.13587946403100298,
|
2189 |
+
"grad_norm": 5.317102909088135,
|
2190 |
+
"learning_rate": 0.000498048444367391,
|
2191 |
+
"logits/chosen": -5.471971035003662,
|
2192 |
+
"logits/rejected": -5.472008228302002,
|
2193 |
+
"logps/chosen": -0.7457516193389893,
|
2194 |
+
"logps/rejected": -3.6155009269714355,
|
2195 |
+
"loss": 0.7782,
|
2196 |
+
"odds_ratio_loss": 2.801055908203125,
|
2197 |
+
"rewards/accuracies": 0.8583333492279053,
|
2198 |
+
"rewards/chosen": -0.07457517087459564,
|
2199 |
+
"rewards/margins": 0.2869749367237091,
|
2200 |
+
"rewards/rejected": -0.36155006289482117,
|
2201 |
+
"sft_loss": 0.49809518456459045,
|
2202 |
+
"step": 1290
|
2203 |
+
},
|
2204 |
+
{
|
2205 |
+
"epoch": 0.13693279320953788,
|
2206 |
+
"grad_norm": 4.3287506103515625,
|
2207 |
+
"learning_rate": 0.00049793211962461,
|
2208 |
+
"logits/chosen": -5.262281894683838,
|
2209 |
+
"logits/rejected": -5.262419700622559,
|
2210 |
+
"logps/chosen": -0.7028716802597046,
|
2211 |
+
"logps/rejected": -2.8387629985809326,
|
2212 |
+
"loss": 0.7409,
|
2213 |
+
"odds_ratio_loss": 2.748593807220459,
|
2214 |
+
"rewards/accuracies": 0.8208333253860474,
|
2215 |
+
"rewards/chosen": -0.07028716057538986,
|
2216 |
+
"rewards/margins": 0.2135891616344452,
|
2217 |
+
"rewards/rejected": -0.28387632966041565,
|
2218 |
+
"sft_loss": 0.4660036265850067,
|
2219 |
+
"step": 1300
|
2220 |
+
},
|
2221 |
+
{
|
2222 |
+
"epoch": 0.1379861223880728,
|
2223 |
+
"grad_norm": 1.7063987255096436,
|
2224 |
+
"learning_rate": 0.0004978124420541238,
|
2225 |
+
"logits/chosen": -5.180874347686768,
|
2226 |
+
"logits/rejected": -5.181042671203613,
|
2227 |
+
"logps/chosen": -0.7432348132133484,
|
2228 |
+
"logps/rejected": -2.8490519523620605,
|
2229 |
+
"loss": 0.7859,
|
2230 |
+
"odds_ratio_loss": 2.679919958114624,
|
2231 |
+
"rewards/accuracies": 0.8229166865348816,
|
2232 |
+
"rewards/chosen": -0.07432348281145096,
|
2233 |
+
"rewards/margins": 0.2105817198753357,
|
2234 |
+
"rewards/rejected": -0.28490516543388367,
|
2235 |
+
"sft_loss": 0.5179334282875061,
|
2236 |
+
"step": 1310
|
2237 |
+
},
|
2238 |
+
{
|
2239 |
+
"epoch": 0.1390394515666077,
|
2240 |
+
"grad_norm": 2.908730983734131,
|
2241 |
+
"learning_rate": 0.0004976894132743521,
|
2242 |
+
"logits/chosen": -5.495538234710693,
|
2243 |
+
"logits/rejected": -5.49543571472168,
|
2244 |
+
"logps/chosen": -0.696144700050354,
|
2245 |
+
"logps/rejected": -3.018319845199585,
|
2246 |
+
"loss": 0.7357,
|
2247 |
+
"odds_ratio_loss": 3.030001163482666,
|
2248 |
+
"rewards/accuracies": 0.8062499761581421,
|
2249 |
+
"rewards/chosen": -0.0696144625544548,
|
2250 |
+
"rewards/margins": 0.23221756517887115,
|
2251 |
+
"rewards/rejected": -0.30183205008506775,
|
2252 |
+
"sft_loss": 0.4326884150505066,
|
2253 |
+
"step": 1320
|
2254 |
+
},
|
2255 |
+
{
|
2256 |
+
"epoch": 0.14009278074514261,
|
2257 |
+
"grad_norm": 3.4632487297058105,
|
2258 |
+
"learning_rate": 0.0004975630349490338,
|
2259 |
+
"logits/chosen": -5.525754928588867,
|
2260 |
+
"logits/rejected": -5.52562141418457,
|
2261 |
+
"logps/chosen": -0.7210444808006287,
|
2262 |
+
"logps/rejected": -3.5407607555389404,
|
2263 |
+
"loss": 0.7551,
|
2264 |
+
"odds_ratio_loss": 2.435321092605591,
|
2265 |
+
"rewards/accuracies": 0.8229166865348816,
|
2266 |
+
"rewards/chosen": -0.07210444658994675,
|
2267 |
+
"rewards/margins": 0.28197160363197327,
|
2268 |
+
"rewards/rejected": -0.3540760576725006,
|
2269 |
+
"sft_loss": 0.5115490555763245,
|
2270 |
+
"step": 1330
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 0.14114610992367752,
|
2274 |
+
"grad_norm": 6.958703517913818,
|
2275 |
+
"learning_rate": 0.0004974333087872041,
|
2276 |
+
"logits/chosen": -5.362403869628906,
|
2277 |
+
"logits/rejected": -5.362163066864014,
|
2278 |
+
"logps/chosen": -0.732792854309082,
|
2279 |
+
"logps/rejected": -4.104527950286865,
|
2280 |
+
"loss": 0.7646,
|
2281 |
+
"odds_ratio_loss": 2.719597339630127,
|
2282 |
+
"rewards/accuracies": 0.8479166626930237,
|
2283 |
+
"rewards/chosen": -0.07327928394079208,
|
2284 |
+
"rewards/margins": 0.3371734619140625,
|
2285 |
+
"rewards/rejected": -0.4104527533054352,
|
2286 |
+
"sft_loss": 0.4926711320877075,
|
2287 |
+
"step": 1340
|
2288 |
+
},
|
2289 |
+
{
|
2290 |
+
"epoch": 0.14219943910221244,
|
2291 |
+
"grad_norm": 2.859614849090576,
|
2292 |
+
"learning_rate": 0.0004973002365431719,
|
2293 |
+
"logits/chosen": -5.708899021148682,
|
2294 |
+
"logits/rejected": -5.708664894104004,
|
2295 |
+
"logps/chosen": -0.6528833508491516,
|
2296 |
+
"logps/rejected": -4.244791030883789,
|
2297 |
+
"loss": 0.6818,
|
2298 |
+
"odds_ratio_loss": 2.3199055194854736,
|
2299 |
+
"rewards/accuracies": 0.8770833611488342,
|
2300 |
+
"rewards/chosen": -0.06528832763433456,
|
2301 |
+
"rewards/margins": 0.35919085144996643,
|
2302 |
+
"rewards/rejected": -0.4244791567325592,
|
2303 |
+
"sft_loss": 0.44977909326553345,
|
2304 |
+
"step": 1350
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 0.14325276828074734,
|
2308 |
+
"grad_norm": 4.355047702789307,
|
2309 |
+
"learning_rate": 0.0004971638200164954,
|
2310 |
+
"logits/chosen": -6.141923427581787,
|
2311 |
+
"logits/rejected": -6.141890525817871,
|
2312 |
+
"logps/chosen": -0.7438533902168274,
|
2313 |
+
"logps/rejected": -3.756343126296997,
|
2314 |
+
"loss": 0.7845,
|
2315 |
+
"odds_ratio_loss": 2.7992961406707764,
|
2316 |
+
"rewards/accuracies": 0.824999988079071,
|
2317 |
+
"rewards/chosen": -0.07438533753156662,
|
2318 |
+
"rewards/margins": 0.3012489676475525,
|
2319 |
+
"rewards/rejected": -0.3756342828273773,
|
2320 |
+
"sft_loss": 0.5046018362045288,
|
2321 |
+
"step": 1360
|
2322 |
+
},
|
2323 |
+
{
|
2324 |
+
"epoch": 0.14430609745928225,
|
2325 |
+
"grad_norm": 2.1691360473632812,
|
2326 |
+
"learning_rate": 0.0004970240610519582,
|
2327 |
+
"logits/chosen": -5.513346195220947,
|
2328 |
+
"logits/rejected": -5.513165473937988,
|
2329 |
+
"logps/chosen": -0.7122442722320557,
|
2330 |
+
"logps/rejected": -4.005341529846191,
|
2331 |
+
"loss": 0.7452,
|
2332 |
+
"odds_ratio_loss": 2.3780596256256104,
|
2333 |
+
"rewards/accuracies": 0.8500000238418579,
|
2334 |
+
"rewards/chosen": -0.07122442126274109,
|
2335 |
+
"rewards/margins": 0.32930976152420044,
|
2336 |
+
"rewards/rejected": -0.40053418278694153,
|
2337 |
+
"sft_loss": 0.5073560476303101,
|
2338 |
+
"step": 1370
|
2339 |
+
},
|
2340 |
+
{
|
2341 |
+
"epoch": 0.14535942663781715,
|
2342 |
+
"grad_norm": 8.108336448669434,
|
2343 |
+
"learning_rate": 0.0004968809615395443,
|
2344 |
+
"logits/chosen": -5.834110736846924,
|
2345 |
+
"logits/rejected": -5.834160327911377,
|
2346 |
+
"logps/chosen": -0.7287462949752808,
|
2347 |
+
"logps/rejected": -3.322920560836792,
|
2348 |
+
"loss": 0.7683,
|
2349 |
+
"odds_ratio_loss": 2.7808871269226074,
|
2350 |
+
"rewards/accuracies": 0.824999988079071,
|
2351 |
+
"rewards/chosen": -0.07287462800741196,
|
2352 |
+
"rewards/margins": 0.25941744446754456,
|
2353 |
+
"rewards/rejected": -0.3322920799255371,
|
2354 |
+
"sft_loss": 0.4901922941207886,
|
2355 |
+
"step": 1380
|
2356 |
+
},
|
2357 |
+
{
|
2358 |
+
"epoch": 0.14641275581635205,
|
2359 |
+
"grad_norm": 4.8621392250061035,
|
2360 |
+
"learning_rate": 0.0004967345234144125,
|
2361 |
+
"logits/chosen": -5.492813587188721,
|
2362 |
+
"logits/rejected": -5.492822647094727,
|
2363 |
+
"logps/chosen": -0.6987211108207703,
|
2364 |
+
"logps/rejected": -3.542156934738159,
|
2365 |
+
"loss": 0.7353,
|
2366 |
+
"odds_ratio_loss": 2.4634013175964355,
|
2367 |
+
"rewards/accuracies": 0.8333333134651184,
|
2368 |
+
"rewards/chosen": -0.06987211108207703,
|
2369 |
+
"rewards/margins": 0.2843436002731323,
|
2370 |
+
"rewards/rejected": -0.35421568155288696,
|
2371 |
+
"sft_loss": 0.48899564146995544,
|
2372 |
+
"step": 1390
|
2373 |
+
},
|
2374 |
+
{
|
2375 |
+
"epoch": 0.14746608499488698,
|
2376 |
+
"grad_norm": 4.177161693572998,
|
2377 |
+
"learning_rate": 0.00049658474865687,
|
2378 |
+
"logits/chosen": -5.457418441772461,
|
2379 |
+
"logits/rejected": -5.457381248474121,
|
2380 |
+
"logps/chosen": -0.6720997095108032,
|
2381 |
+
"logps/rejected": -3.794857978820801,
|
2382 |
+
"loss": 0.705,
|
2383 |
+
"odds_ratio_loss": 2.515441417694092,
|
2384 |
+
"rewards/accuracies": 0.8520833253860474,
|
2385 |
+
"rewards/chosen": -0.06720996648073196,
|
2386 |
+
"rewards/margins": 0.31227582693099976,
|
2387 |
+
"rewards/rejected": -0.3794857859611511,
|
2388 |
+
"sft_loss": 0.4534277021884918,
|
2389 |
+
"step": 1400
|
2390 |
+
},
|
2391 |
+
{
|
2392 |
+
"epoch": 0.14851941417342188,
|
2393 |
+
"grad_norm": 6.939654350280762,
|
2394 |
+
"learning_rate": 0.000496431639292346,
|
2395 |
+
"logits/chosen": -5.591572284698486,
|
2396 |
+
"logits/rejected": -5.591520309448242,
|
2397 |
+
"logps/chosen": -0.6898292899131775,
|
2398 |
+
"logps/rejected": -3.8443169593811035,
|
2399 |
+
"loss": 0.7205,
|
2400 |
+
"odds_ratio_loss": 2.573276996612549,
|
2401 |
+
"rewards/accuracies": 0.8583333492279053,
|
2402 |
+
"rewards/chosen": -0.06898292899131775,
|
2403 |
+
"rewards/margins": 0.3154487609863281,
|
2404 |
+
"rewards/rejected": -0.3844316899776459,
|
2405 |
+
"sft_loss": 0.46317487955093384,
|
2406 |
+
"step": 1410
|
2407 |
+
},
|
2408 |
+
{
|
2409 |
+
"epoch": 0.14957274335195678,
|
2410 |
+
"grad_norm": 3.3419246673583984,
|
2411 |
+
"learning_rate": 0.0004962751973913644,
|
2412 |
+
"logits/chosen": -5.660191059112549,
|
2413 |
+
"logits/rejected": -5.6601338386535645,
|
2414 |
+
"logps/chosen": -0.7214117050170898,
|
2415 |
+
"logps/rejected": -3.800171375274658,
|
2416 |
+
"loss": 0.7539,
|
2417 |
+
"odds_ratio_loss": 2.2002346515655518,
|
2418 |
+
"rewards/accuracies": 0.8354166746139526,
|
2419 |
+
"rewards/chosen": -0.07214117050170898,
|
2420 |
+
"rewards/margins": 0.30787599086761475,
|
2421 |
+
"rewards/rejected": -0.38001713156700134,
|
2422 |
+
"sft_loss": 0.5338917374610901,
|
2423 |
+
"step": 1420
|
2424 |
+
},
|
2425 |
+
{
|
2426 |
+
"epoch": 0.15062607253049168,
|
2427 |
+
"grad_norm": 6.627447128295898,
|
2428 |
+
"learning_rate": 0.0004961154250695152,
|
2429 |
+
"logits/chosen": -5.646604537963867,
|
2430 |
+
"logits/rejected": -5.646506309509277,
|
2431 |
+
"logps/chosen": -0.6789618730545044,
|
2432 |
+
"logps/rejected": -3.7556862831115723,
|
2433 |
+
"loss": 0.7131,
|
2434 |
+
"odds_ratio_loss": 2.4358396530151367,
|
2435 |
+
"rewards/accuracies": 0.8395833373069763,
|
2436 |
+
"rewards/chosen": -0.06789619475603104,
|
2437 |
+
"rewards/margins": 0.3076724112033844,
|
2438 |
+
"rewards/rejected": -0.3755686581134796,
|
2439 |
+
"sft_loss": 0.46949687600135803,
|
2440 |
+
"step": 1430
|
2441 |
+
},
|
2442 |
+
{
|
2443 |
+
"epoch": 0.15167940170902658,
|
2444 |
+
"grad_norm": 7.264041900634766,
|
2445 |
+
"learning_rate": 0.0004959523244874262,
|
2446 |
+
"logits/chosen": -5.661590576171875,
|
2447 |
+
"logits/rejected": -5.661508560180664,
|
2448 |
+
"logps/chosen": -0.6989915370941162,
|
2449 |
+
"logps/rejected": -3.6030194759368896,
|
2450 |
+
"loss": 0.7324,
|
2451 |
+
"odds_ratio_loss": 2.595428943634033,
|
2452 |
+
"rewards/accuracies": 0.8458333611488342,
|
2453 |
+
"rewards/chosen": -0.06989916414022446,
|
2454 |
+
"rewards/margins": 0.2904028296470642,
|
2455 |
+
"rewards/rejected": -0.3603019714355469,
|
2456 |
+
"sft_loss": 0.4728315472602844,
|
2457 |
+
"step": 1440
|
2458 |
+
},
|
2459 |
+
{
|
2460 |
+
"epoch": 0.1527327308875615,
|
2461 |
+
"grad_norm": 3.4570446014404297,
|
2462 |
+
"learning_rate": 0.0004957858978507342,
|
2463 |
+
"logits/chosen": -5.628535270690918,
|
2464 |
+
"logits/rejected": -5.628504276275635,
|
2465 |
+
"logps/chosen": -0.6590794324874878,
|
2466 |
+
"logps/rejected": -3.3306422233581543,
|
2467 |
+
"loss": 0.6931,
|
2468 |
+
"odds_ratio_loss": 2.6064822673797607,
|
2469 |
+
"rewards/accuracies": 0.8416666388511658,
|
2470 |
+
"rewards/chosen": -0.06590793281793594,
|
2471 |
+
"rewards/margins": 0.2671562433242798,
|
2472 |
+
"rewards/rejected": -0.3330641984939575,
|
2473 |
+
"sft_loss": 0.43240654468536377,
|
2474 |
+
"step": 1450
|
2475 |
+
},
|
2476 |
+
{
|
2477 |
+
"epoch": 0.1537860600660964,
|
2478 |
+
"grad_norm": 7.473481178283691,
|
2479 |
+
"learning_rate": 0.0004956161474100544,
|
2480 |
+
"logits/chosen": -5.7138261795043945,
|
2481 |
+
"logits/rejected": -5.713827133178711,
|
2482 |
+
"logps/chosen": -0.6599766612052917,
|
2483 |
+
"logps/rejected": -3.381110668182373,
|
2484 |
+
"loss": 0.6939,
|
2485 |
+
"odds_ratio_loss": 2.4460840225219727,
|
2486 |
+
"rewards/accuracies": 0.84375,
|
2487 |
+
"rewards/chosen": -0.06599767506122589,
|
2488 |
+
"rewards/margins": 0.2721133828163147,
|
2489 |
+
"rewards/rejected": -0.3381110727787018,
|
2490 |
+
"sft_loss": 0.44932422041893005,
|
2491 |
+
"step": 1460
|
2492 |
+
},
|
2493 |
+
{
|
2494 |
+
"epoch": 0.1548393892446313,
|
2495 |
+
"grad_norm": 6.867354869842529,
|
2496 |
+
"learning_rate": 0.0004954430754609506,
|
2497 |
+
"logits/chosen": -5.79508638381958,
|
2498 |
+
"logits/rejected": -5.795089244842529,
|
2499 |
+
"logps/chosen": -0.6903258562088013,
|
2500 |
+
"logps/rejected": -3.085076332092285,
|
2501 |
+
"loss": 0.7338,
|
2502 |
+
"odds_ratio_loss": 2.651606798171997,
|
2503 |
+
"rewards/accuracies": 0.8041666746139526,
|
2504 |
+
"rewards/chosen": -0.06903257966041565,
|
2505 |
+
"rewards/margins": 0.2394750565290451,
|
2506 |
+
"rewards/rejected": -0.30850762128829956,
|
2507 |
+
"sft_loss": 0.4686751961708069,
|
2508 |
+
"step": 1470
|
2509 |
+
},
|
2510 |
+
{
|
2511 |
+
"epoch": 0.1558927184231662,
|
2512 |
+
"grad_norm": 3.0605275630950928,
|
2513 |
+
"learning_rate": 0.0004952666843439038,
|
2514 |
+
"logits/chosen": -5.6379008293151855,
|
2515 |
+
"logits/rejected": -5.6378703117370605,
|
2516 |
+
"logps/chosen": -0.6344018578529358,
|
2517 |
+
"logps/rejected": -3.5333213806152344,
|
2518 |
+
"loss": 0.6667,
|
2519 |
+
"odds_ratio_loss": 2.371415853500366,
|
2520 |
+
"rewards/accuracies": 0.8354166746139526,
|
2521 |
+
"rewards/chosen": -0.06344018131494522,
|
2522 |
+
"rewards/margins": 0.2898919880390167,
|
2523 |
+
"rewards/rejected": -0.35333216190338135,
|
2524 |
+
"sft_loss": 0.42956602573394775,
|
2525 |
+
"step": 1480
|
2526 |
+
},
|
2527 |
+
{
|
2528 |
+
"epoch": 0.15694604760170114,
|
2529 |
+
"grad_norm": 3.5998635292053223,
|
2530 |
+
"learning_rate": 0.0004950869764442807,
|
2531 |
+
"logits/chosen": -5.513609886169434,
|
2532 |
+
"logits/rejected": -5.513594627380371,
|
2533 |
+
"logps/chosen": -0.6546897888183594,
|
2534 |
+
"logps/rejected": -3.4388887882232666,
|
2535 |
+
"loss": 0.6841,
|
2536 |
+
"odds_ratio_loss": 2.4476258754730225,
|
2537 |
+
"rewards/accuracies": 0.8854166865348816,
|
2538 |
+
"rewards/chosen": -0.06546898186206818,
|
2539 |
+
"rewards/margins": 0.2784199118614197,
|
2540 |
+
"rewards/rejected": -0.34388887882232666,
|
2541 |
+
"sft_loss": 0.43934836983680725,
|
2542 |
+
"step": 1490
|
2543 |
+
},
|
2544 |
+
{
|
2545 |
+
"epoch": 0.15799937678023604,
|
2546 |
+
"grad_norm": 3.7529213428497314,
|
2547 |
+
"learning_rate": 0.0004949039541923015,
|
2548 |
+
"logits/chosen": -5.581011772155762,
|
2549 |
+
"logits/rejected": -5.580976486206055,
|
2550 |
+
"logps/chosen": -0.6702675223350525,
|
2551 |
+
"logps/rejected": -3.7115352153778076,
|
2552 |
+
"loss": 0.7055,
|
2553 |
+
"odds_ratio_loss": 2.6884007453918457,
|
2554 |
+
"rewards/accuracies": 0.8416666388511658,
|
2555 |
+
"rewards/chosen": -0.06702675670385361,
|
2556 |
+
"rewards/margins": 0.3041267395019531,
|
2557 |
+
"rewards/rejected": -0.37115350365638733,
|
2558 |
+
"sft_loss": 0.43668031692504883,
|
2559 |
+
"step": 1500
|
2560 |
+
},
|
2561 |
+
{
|
2562 |
+
"epoch": 0.15905270595877094,
|
2563 |
+
"grad_norm": 6.72556734085083,
|
2564 |
+
"learning_rate": 0.0004947176200630068,
|
2565 |
+
"logits/chosen": -5.502162456512451,
|
2566 |
+
"logits/rejected": -5.502138137817383,
|
2567 |
+
"logps/chosen": -0.6218239068984985,
|
2568 |
+
"logps/rejected": -3.428339719772339,
|
2569 |
+
"loss": 0.6499,
|
2570 |
+
"odds_ratio_loss": 2.3972864151000977,
|
2571 |
+
"rewards/accuracies": 0.8708333373069763,
|
2572 |
+
"rewards/chosen": -0.06218238174915314,
|
2573 |
+
"rewards/margins": 0.28065159916877747,
|
2574 |
+
"rewards/rejected": -0.3428339660167694,
|
2575 |
+
"sft_loss": 0.4101923108100891,
|
2576 |
+
"step": 1510
|
2577 |
+
},
|
2578 |
+
{
|
2579 |
+
"epoch": 0.16010603513730584,
|
2580 |
+
"grad_norm": 3.806243658065796,
|
2581 |
+
"learning_rate": 0.0004945279765762243,
|
2582 |
+
"logits/chosen": -5.590113639831543,
|
2583 |
+
"logits/rejected": -5.590085029602051,
|
2584 |
+
"logps/chosen": -0.6871938109397888,
|
2585 |
+
"logps/rejected": -3.6291747093200684,
|
2586 |
+
"loss": 0.7198,
|
2587 |
+
"odds_ratio_loss": 2.472882032394409,
|
2588 |
+
"rewards/accuracies": 0.8458333611488342,
|
2589 |
+
"rewards/chosen": -0.06871937960386276,
|
2590 |
+
"rewards/margins": 0.2941981554031372,
|
2591 |
+
"rewards/rejected": -0.3629175126552582,
|
2592 |
+
"sft_loss": 0.4725038409233093,
|
2593 |
+
"step": 1520
|
2594 |
+
},
|
2595 |
+
{
|
2596 |
+
"epoch": 0.16115936431584074,
|
2597 |
+
"grad_norm": 5.523288249969482,
|
2598 |
+
"learning_rate": 0.0004943350262965349,
|
2599 |
+
"logits/chosen": -5.691066265106201,
|
2600 |
+
"logits/rejected": -5.6911163330078125,
|
2601 |
+
"logps/chosen": -0.6510820984840393,
|
2602 |
+
"logps/rejected": -3.059138774871826,
|
2603 |
+
"loss": 0.685,
|
2604 |
+
"odds_ratio_loss": 2.56689715385437,
|
2605 |
+
"rewards/accuracies": 0.862500011920929,
|
2606 |
+
"rewards/chosen": -0.06510820984840393,
|
2607 |
+
"rewards/margins": 0.24080567061901093,
|
2608 |
+
"rewards/rejected": -0.30591386556625366,
|
2609 |
+
"sft_loss": 0.42830324172973633,
|
2610 |
+
"step": 1530
|
2611 |
+
},
|
2612 |
+
{
|
2613 |
+
"epoch": 0.16221269349437567,
|
2614 |
+
"grad_norm": 4.537405967712402,
|
2615 |
+
"learning_rate": 0.0004941387718332374,
|
2616 |
+
"logits/chosen": -5.746434688568115,
|
2617 |
+
"logits/rejected": -5.746466159820557,
|
2618 |
+
"logps/chosen": -0.6964403390884399,
|
2619 |
+
"logps/rejected": -3.4062578678131104,
|
2620 |
+
"loss": 0.7309,
|
2621 |
+
"odds_ratio_loss": 2.425229787826538,
|
2622 |
+
"rewards/accuracies": 0.8416666388511658,
|
2623 |
+
"rewards/chosen": -0.0696440264582634,
|
2624 |
+
"rewards/margins": 0.2709817886352539,
|
2625 |
+
"rewards/rejected": -0.3406257629394531,
|
2626 |
+
"sft_loss": 0.4884008467197418,
|
2627 |
+
"step": 1540
|
2628 |
+
},
|
2629 |
+
{
|
2630 |
+
"epoch": 0.16326602267291057,
|
2631 |
+
"grad_norm": 1.987900972366333,
|
2632 |
+
"learning_rate": 0.000493939215840314,
|
2633 |
+
"logits/chosen": -5.694365978240967,
|
2634 |
+
"logits/rejected": -5.694273471832275,
|
2635 |
+
"logps/chosen": -0.6464301347732544,
|
2636 |
+
"logps/rejected": -3.67587947845459,
|
2637 |
+
"loss": 0.6811,
|
2638 |
+
"odds_ratio_loss": 2.441976308822632,
|
2639 |
+
"rewards/accuracies": 0.84375,
|
2640 |
+
"rewards/chosen": -0.0646430179476738,
|
2641 |
+
"rewards/margins": 0.3029448688030243,
|
2642 |
+
"rewards/rejected": -0.36758795380592346,
|
2643 |
+
"sft_loss": 0.43692201375961304,
|
2644 |
+
"step": 1550
|
2645 |
+
},
|
2646 |
+
{
|
2647 |
+
"epoch": 0.16431935185144547,
|
2648 |
+
"grad_norm": 4.507101058959961,
|
2649 |
+
"learning_rate": 0.000493736361016394,
|
2650 |
+
"logits/chosen": -5.841778755187988,
|
2651 |
+
"logits/rejected": -5.841654300689697,
|
2652 |
+
"logps/chosen": -0.6818705797195435,
|
2653 |
+
"logps/rejected": -3.587505578994751,
|
2654 |
+
"loss": 0.7136,
|
2655 |
+
"odds_ratio_loss": 2.5504865646362305,
|
2656 |
+
"rewards/accuracies": 0.8291666507720947,
|
2657 |
+
"rewards/chosen": -0.06818706542253494,
|
2658 |
+
"rewards/margins": 0.29056352376937866,
|
2659 |
+
"rewards/rejected": -0.3587505519390106,
|
2660 |
+
"sft_loss": 0.45857658982276917,
|
2661 |
+
"step": 1560
|
2662 |
+
},
|
2663 |
+
{
|
2664 |
+
"epoch": 0.16537268102998037,
|
2665 |
+
"grad_norm": 3.6050593852996826,
|
2666 |
+
"learning_rate": 0.0004935302101047171,
|
2667 |
+
"logits/chosen": -5.996950149536133,
|
2668 |
+
"logits/rejected": -5.996947288513184,
|
2669 |
+
"logps/chosen": -0.6442388296127319,
|
2670 |
+
"logps/rejected": -3.464613437652588,
|
2671 |
+
"loss": 0.673,
|
2672 |
+
"odds_ratio_loss": 2.08451247215271,
|
2673 |
+
"rewards/accuracies": 0.8520833253860474,
|
2674 |
+
"rewards/chosen": -0.06442389637231827,
|
2675 |
+
"rewards/margins": 0.2820374071598053,
|
2676 |
+
"rewards/rejected": -0.34646129608154297,
|
2677 |
+
"sft_loss": 0.4645636975765228,
|
2678 |
+
"step": 1570
|
2679 |
+
},
|
2680 |
+
{
|
2681 |
+
"epoch": 0.1664260102085153,
|
2682 |
+
"grad_norm": 6.22938871383667,
|
2683 |
+
"learning_rate": 0.0004933207658930968,
|
2684 |
+
"logits/chosen": -6.110846996307373,
|
2685 |
+
"logits/rejected": -6.1108527183532715,
|
2686 |
+
"logps/chosen": -0.5905119776725769,
|
2687 |
+
"logps/rejected": -3.7033638954162598,
|
2688 |
+
"loss": 0.6196,
|
2689 |
+
"odds_ratio_loss": 2.2230594158172607,
|
2690 |
+
"rewards/accuracies": 0.8708333373069763,
|
2691 |
+
"rewards/chosen": -0.05905119329690933,
|
2692 |
+
"rewards/margins": 0.3112851679325104,
|
2693 |
+
"rewards/rejected": -0.3703364133834839,
|
2694 |
+
"sft_loss": 0.3972512185573578,
|
2695 |
+
"step": 1580
|
2696 |
+
},
|
2697 |
+
{
|
2698 |
+
"epoch": 0.1674793393870502,
|
2699 |
+
"grad_norm": 5.692537307739258,
|
2700 |
+
"learning_rate": 0.0004931080312138824,
|
2701 |
+
"logits/chosen": -5.9748077392578125,
|
2702 |
+
"logits/rejected": -5.974870681762695,
|
2703 |
+
"logps/chosen": -0.6414510607719421,
|
2704 |
+
"logps/rejected": -3.1401894092559814,
|
2705 |
+
"loss": 0.6755,
|
2706 |
+
"odds_ratio_loss": 2.408728837966919,
|
2707 |
+
"rewards/accuracies": 0.8416666388511658,
|
2708 |
+
"rewards/chosen": -0.06414511054754257,
|
2709 |
+
"rewards/margins": 0.24987384676933289,
|
2710 |
+
"rewards/rejected": -0.31401893496513367,
|
2711 |
+
"sft_loss": 0.4346589744091034,
|
2712 |
+
"step": 1590
|
2713 |
+
},
|
2714 |
+
{
|
2715 |
+
"epoch": 0.1685326685655851,
|
2716 |
+
"grad_norm": 4.23068380355835,
|
2717 |
+
"learning_rate": 0.0004928920089439206,
|
2718 |
+
"logits/chosen": -5.843720436096191,
|
2719 |
+
"logits/rejected": -5.84379768371582,
|
2720 |
+
"logps/chosen": -0.7081334590911865,
|
2721 |
+
"logps/rejected": -3.097430467605591,
|
2722 |
+
"loss": 0.7425,
|
2723 |
+
"odds_ratio_loss": 2.416752338409424,
|
2724 |
+
"rewards/accuracies": 0.8354166746139526,
|
2725 |
+
"rewards/chosen": -0.07081333547830582,
|
2726 |
+
"rewards/margins": 0.23892968893051147,
|
2727 |
+
"rewards/rejected": -0.3097430169582367,
|
2728 |
+
"sft_loss": 0.5008493661880493,
|
2729 |
+
"step": 1600
|
2730 |
+
},
|
2731 |
+
{
|
2732 |
+
"epoch": 0.16958599774412,
|
2733 |
+
"grad_norm": 7.679098606109619,
|
2734 |
+
"learning_rate": 0.000492672702004517,
|
2735 |
+
"logits/chosen": -5.8228349685668945,
|
2736 |
+
"logits/rejected": -5.822881698608398,
|
2737 |
+
"logps/chosen": -0.6390455365180969,
|
2738 |
+
"logps/rejected": -2.928208112716675,
|
2739 |
+
"loss": 0.6714,
|
2740 |
+
"odds_ratio_loss": 2.5421833992004395,
|
2741 |
+
"rewards/accuracies": 0.8479166626930237,
|
2742 |
+
"rewards/chosen": -0.0639045462012291,
|
2743 |
+
"rewards/margins": 0.2289162576198578,
|
2744 |
+
"rewards/rejected": -0.29282084107398987,
|
2745 |
+
"sft_loss": 0.4171499013900757,
|
2746 |
+
"step": 1610
|
2747 |
+
},
|
2748 |
+
{
|
2749 |
+
"epoch": 0.1706393269226549,
|
2750 |
+
"grad_norm": 2.961296558380127,
|
2751 |
+
"learning_rate": 0.000492450113361396,
|
2752 |
+
"logits/chosen": -5.76025915145874,
|
2753 |
+
"logits/rejected": -5.760366916656494,
|
2754 |
+
"logps/chosen": -0.707727313041687,
|
2755 |
+
"logps/rejected": -2.787848472595215,
|
2756 |
+
"loss": 0.7488,
|
2757 |
+
"odds_ratio_loss": 2.6859564781188965,
|
2758 |
+
"rewards/accuracies": 0.8083333373069763,
|
2759 |
+
"rewards/chosen": -0.07077272981405258,
|
2760 |
+
"rewards/margins": 0.20801211893558502,
|
2761 |
+
"rewards/rejected": -0.278784841299057,
|
2762 |
+
"sft_loss": 0.48024895787239075,
|
2763 |
+
"step": 1620
|
2764 |
+
},
|
2765 |
+
{
|
2766 |
+
"epoch": 0.17169265610118983,
|
2767 |
+
"grad_norm": 5.1038432121276855,
|
2768 |
+
"learning_rate": 0.0004922242460246613,
|
2769 |
+
"logits/chosen": -5.724485397338867,
|
2770 |
+
"logits/rejected": -5.724647521972656,
|
2771 |
+
"logps/chosen": -0.6975029110908508,
|
2772 |
+
"logps/rejected": -2.6335670948028564,
|
2773 |
+
"loss": 0.7302,
|
2774 |
+
"odds_ratio_loss": 2.552466869354248,
|
2775 |
+
"rewards/accuracies": 0.8729166388511658,
|
2776 |
+
"rewards/chosen": -0.06975029408931732,
|
2777 |
+
"rewards/margins": 0.1936063915491104,
|
2778 |
+
"rewards/rejected": -0.26335668563842773,
|
2779 |
+
"sft_loss": 0.4749198257923126,
|
2780 |
+
"step": 1630
|
2781 |
+
},
|
2782 |
+
{
|
2783 |
+
"epoch": 0.17274598527972473,
|
2784 |
+
"grad_norm": 4.588533401489258,
|
2785 |
+
"learning_rate": 0.0004919951030487549,
|
2786 |
+
"logits/chosen": -5.752465724945068,
|
2787 |
+
"logits/rejected": -5.75269079208374,
|
2788 |
+
"logps/chosen": -0.7377220392227173,
|
2789 |
+
"logps/rejected": -2.5051889419555664,
|
2790 |
+
"loss": 0.7742,
|
2791 |
+
"odds_ratio_loss": 2.6720080375671387,
|
2792 |
+
"rewards/accuracies": 0.8354166746139526,
|
2793 |
+
"rewards/chosen": -0.07377220690250397,
|
2794 |
+
"rewards/margins": 0.1767466962337494,
|
2795 |
+
"rewards/rejected": -0.25051891803741455,
|
2796 |
+
"sft_loss": 0.5070357918739319,
|
2797 |
+
"step": 1640
|
2798 |
+
},
|
2799 |
+
{
|
2800 |
+
"epoch": 0.17379931445825963,
|
2801 |
+
"grad_norm": 4.702722072601318,
|
2802 |
+
"learning_rate": 0.0004917626875324156,
|
2803 |
+
"logits/chosen": -5.999307155609131,
|
2804 |
+
"logits/rejected": -5.999597072601318,
|
2805 |
+
"logps/chosen": -0.6588828563690186,
|
2806 |
+
"logps/rejected": -3.105346202850342,
|
2807 |
+
"loss": 0.6907,
|
2808 |
+
"odds_ratio_loss": 2.3203392028808594,
|
2809 |
+
"rewards/accuracies": 0.862500011920929,
|
2810 |
+
"rewards/chosen": -0.06588829308748245,
|
2811 |
+
"rewards/margins": 0.244646355509758,
|
2812 |
+
"rewards/rejected": -0.31053462624549866,
|
2813 |
+
"sft_loss": 0.458629310131073,
|
2814 |
+
"step": 1650
|
2815 |
+
},
|
2816 |
+
{
|
2817 |
+
"epoch": 0.17485264363679454,
|
2818 |
+
"grad_norm": 4.34697961807251,
|
2819 |
+
"learning_rate": 0.0004915270026186377,
|
2820 |
+
"logits/chosen": -6.0448760986328125,
|
2821 |
+
"logits/rejected": -6.045097827911377,
|
2822 |
+
"logps/chosen": -0.6159750819206238,
|
2823 |
+
"logps/rejected": -3.314389705657959,
|
2824 |
+
"loss": 0.6451,
|
2825 |
+
"odds_ratio_loss": 2.2701919078826904,
|
2826 |
+
"rewards/accuracies": 0.862500011920929,
|
2827 |
+
"rewards/chosen": -0.06159750744700432,
|
2828 |
+
"rewards/margins": 0.2698414921760559,
|
2829 |
+
"rewards/rejected": -0.33143898844718933,
|
2830 |
+
"sft_loss": 0.4180779755115509,
|
2831 |
+
"step": 1660
|
2832 |
+
},
|
2833 |
+
{
|
2834 |
+
"epoch": 0.17590597281532946,
|
2835 |
+
"grad_norm": 6.3104119300842285,
|
2836 |
+
"learning_rate": 0.0004912880514946277,
|
2837 |
+
"logits/chosen": -6.198366165161133,
|
2838 |
+
"logits/rejected": -6.198540210723877,
|
2839 |
+
"logps/chosen": -0.6512912511825562,
|
2840 |
+
"logps/rejected": -3.2115368843078613,
|
2841 |
+
"loss": 0.6825,
|
2842 |
+
"odds_ratio_loss": 2.350752353668213,
|
2843 |
+
"rewards/accuracies": 0.8500000238418579,
|
2844 |
+
"rewards/chosen": -0.0651291236281395,
|
2845 |
+
"rewards/margins": 0.256024569272995,
|
2846 |
+
"rewards/rejected": -0.3211536705493927,
|
2847 |
+
"sft_loss": 0.4474564790725708,
|
2848 |
+
"step": 1670
|
2849 |
+
},
|
2850 |
+
{
|
2851 |
+
"epoch": 0.17695930199386437,
|
2852 |
+
"grad_norm": 4.557435989379883,
|
2853 |
+
"learning_rate": 0.0004910458373917618,
|
2854 |
+
"logits/chosen": -5.757941722869873,
|
2855 |
+
"logits/rejected": -5.75801944732666,
|
2856 |
+
"logps/chosen": -0.7286573648452759,
|
2857 |
+
"logps/rejected": -3.038480043411255,
|
2858 |
+
"loss": 0.7631,
|
2859 |
+
"odds_ratio_loss": 2.563901662826538,
|
2860 |
+
"rewards/accuracies": 0.8229166865348816,
|
2861 |
+
"rewards/chosen": -0.07286573201417923,
|
2862 |
+
"rewards/margins": 0.23098230361938477,
|
2863 |
+
"rewards/rejected": -0.303847998380661,
|
2864 |
+
"sft_loss": 0.5067596435546875,
|
2865 |
+
"step": 1680
|
2866 |
+
},
|
2867 |
+
{
|
2868 |
+
"epoch": 0.17801263117239927,
|
2869 |
+
"grad_norm": 4.517858028411865,
|
2870 |
+
"learning_rate": 0.0004908003635855421,
|
2871 |
+
"logits/chosen": -5.7685866355896,
|
2872 |
+
"logits/rejected": -5.768723011016846,
|
2873 |
+
"logps/chosen": -0.6709701418876648,
|
2874 |
+
"logps/rejected": -3.045161724090576,
|
2875 |
+
"loss": 0.7057,
|
2876 |
+
"odds_ratio_loss": 2.353038787841797,
|
2877 |
+
"rewards/accuracies": 0.8270833492279053,
|
2878 |
+
"rewards/chosen": -0.0670970231294632,
|
2879 |
+
"rewards/margins": 0.23741915822029114,
|
2880 |
+
"rewards/rejected": -0.30451616644859314,
|
2881 |
+
"sft_loss": 0.4703839421272278,
|
2882 |
+
"step": 1690
|
2883 |
+
},
|
2884 |
+
{
|
2885 |
+
"epoch": 0.17906596035093417,
|
2886 |
+
"grad_norm": 3.8505797386169434,
|
2887 |
+
"learning_rate": 0.0004905516333955521,
|
2888 |
+
"logits/chosen": -5.820653915405273,
|
2889 |
+
"logits/rejected": -5.820913791656494,
|
2890 |
+
"logps/chosen": -0.6014044880867004,
|
2891 |
+
"logps/rejected": -2.9712061882019043,
|
2892 |
+
"loss": 0.6348,
|
2893 |
+
"odds_ratio_loss": 2.333472728729248,
|
2894 |
+
"rewards/accuracies": 0.84375,
|
2895 |
+
"rewards/chosen": -0.060140449553728104,
|
2896 |
+
"rewards/margins": 0.23698018491268158,
|
2897 |
+
"rewards/rejected": -0.2971206307411194,
|
2898 |
+
"sft_loss": 0.40144431591033936,
|
2899 |
+
"step": 1700
|
2900 |
+
},
|
2901 |
+
{
|
2902 |
+
"epoch": 0.18011928952946907,
|
2903 |
+
"grad_norm": 5.725916385650635,
|
2904 |
+
"learning_rate": 0.0004902996501854119,
|
2905 |
+
"logits/chosen": -6.354620933532715,
|
2906 |
+
"logits/rejected": -6.355036735534668,
|
2907 |
+
"logps/chosen": -1.4475494623184204,
|
2908 |
+
"logps/rejected": -3.9082717895507812,
|
2909 |
+
"loss": 1.4845,
|
2910 |
+
"odds_ratio_loss": 4.034452438354492,
|
2911 |
+
"rewards/accuracies": 0.8291666507720947,
|
2912 |
+
"rewards/chosen": -0.14475493133068085,
|
2913 |
+
"rewards/margins": 0.24607227742671967,
|
2914 |
+
"rewards/rejected": -0.3908271789550781,
|
2915 |
+
"sft_loss": 1.0810879468917847,
|
2916 |
+
"step": 1710
|
2917 |
+
},
|
2918 |
+
{
|
2919 |
+
"epoch": 0.181172618708004,
|
2920 |
+
"grad_norm": 8.74376392364502,
|
2921 |
+
"learning_rate": 0.0004900444173627328,
|
2922 |
+
"logits/chosen": -6.625903129577637,
|
2923 |
+
"logits/rejected": -6.62622594833374,
|
2924 |
+
"logps/chosen": -0.7164724469184875,
|
2925 |
+
"logps/rejected": -3.1040403842926025,
|
2926 |
+
"loss": 0.7538,
|
2927 |
+
"odds_ratio_loss": 2.710744619369507,
|
2928 |
+
"rewards/accuracies": 0.8208333253860474,
|
2929 |
+
"rewards/chosen": -0.07164724916219711,
|
2930 |
+
"rewards/margins": 0.23875676095485687,
|
2931 |
+
"rewards/rejected": -0.3104040026664734,
|
2932 |
+
"sft_loss": 0.4827170670032501,
|
2933 |
+
"step": 1720
|
2934 |
+
},
|
2935 |
+
{
|
2936 |
+
"epoch": 0.1822259478865389,
|
2937 |
+
"grad_norm": 2.9171664714813232,
|
2938 |
+
"learning_rate": 0.0004897859383790711,
|
2939 |
+
"logits/chosen": -6.8558220863342285,
|
2940 |
+
"logits/rejected": -6.856238842010498,
|
2941 |
+
"logps/chosen": -0.6772664785385132,
|
2942 |
+
"logps/rejected": -3.0685346126556396,
|
2943 |
+
"loss": 0.7145,
|
2944 |
+
"odds_ratio_loss": 2.5943028926849365,
|
2945 |
+
"rewards/accuracies": 0.8145833611488342,
|
2946 |
+
"rewards/chosen": -0.06772664934396744,
|
2947 |
+
"rewards/margins": 0.23912683129310608,
|
2948 |
+
"rewards/rejected": -0.3068534731864929,
|
2949 |
+
"sft_loss": 0.4550252854824066,
|
2950 |
+
"step": 1730
|
2951 |
+
},
|
2952 |
+
{
|
2953 |
+
"epoch": 0.1832792770650738,
|
2954 |
+
"grad_norm": 4.223478317260742,
|
2955 |
+
"learning_rate": 0.0004895242167298816,
|
2956 |
+
"logits/chosen": -6.91244649887085,
|
2957 |
+
"logits/rejected": -6.912972927093506,
|
2958 |
+
"logps/chosen": -0.6928088665008545,
|
2959 |
+
"logps/rejected": -3.333386182785034,
|
2960 |
+
"loss": 0.724,
|
2961 |
+
"odds_ratio_loss": 2.613675117492676,
|
2962 |
+
"rewards/accuracies": 0.8291666507720947,
|
2963 |
+
"rewards/chosen": -0.06928088515996933,
|
2964 |
+
"rewards/margins": 0.2640577256679535,
|
2965 |
+
"rewards/rejected": -0.3333386480808258,
|
2966 |
+
"sft_loss": 0.4625937044620514,
|
2967 |
+
"step": 1740
|
2968 |
+
},
|
2969 |
+
{
|
2970 |
+
"epoch": 0.1843326062436087,
|
2971 |
+
"grad_norm": 6.387345790863037,
|
2972 |
+
"learning_rate": 0.0004892592559544702,
|
2973 |
+
"logits/chosen": -6.475886821746826,
|
2974 |
+
"logits/rejected": -6.476265907287598,
|
2975 |
+
"logps/chosen": -0.689757764339447,
|
2976 |
+
"logps/rejected": -2.9596078395843506,
|
2977 |
+
"loss": 0.7284,
|
2978 |
+
"odds_ratio_loss": 2.5536398887634277,
|
2979 |
+
"rewards/accuracies": 0.8083333373069763,
|
2980 |
+
"rewards/chosen": -0.0689757764339447,
|
2981 |
+
"rewards/margins": 0.22698503732681274,
|
2982 |
+
"rewards/rejected": -0.29596078395843506,
|
2983 |
+
"sft_loss": 0.47301238775253296,
|
2984 |
+
"step": 1750
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 0.1853859354221436,
|
2988 |
+
"grad_norm": 4.752090930938721,
|
2989 |
+
"learning_rate": 0.0004889910596359457,
|
2990 |
+
"logits/chosen": -6.3866801261901855,
|
2991 |
+
"logits/rejected": -6.3870625495910645,
|
2992 |
+
"logps/chosen": -0.6425169110298157,
|
2993 |
+
"logps/rejected": -3.2402262687683105,
|
2994 |
+
"loss": 0.6768,
|
2995 |
+
"odds_ratio_loss": 2.416498899459839,
|
2996 |
+
"rewards/accuracies": 0.8583333492279053,
|
2997 |
+
"rewards/chosen": -0.06425168365240097,
|
2998 |
+
"rewards/margins": 0.259770929813385,
|
2999 |
+
"rewards/rejected": -0.3240226209163666,
|
3000 |
+
"sft_loss": 0.43517401814460754,
|
3001 |
+
"step": 1760
|
3002 |
+
},
|
3003 |
+
{
|
3004 |
+
"epoch": 0.18643926460067853,
|
3005 |
+
"grad_norm": 4.994302272796631,
|
3006 |
+
"learning_rate": 0.0004887196314011722,
|
3007 |
+
"logits/chosen": -6.208808422088623,
|
3008 |
+
"logits/rejected": -6.209136009216309,
|
3009 |
+
"logps/chosen": -0.6876904964447021,
|
3010 |
+
"logps/rejected": -3.412658929824829,
|
3011 |
+
"loss": 0.7195,
|
3012 |
+
"odds_ratio_loss": 2.441316843032837,
|
3013 |
+
"rewards/accuracies": 0.8479166626930237,
|
3014 |
+
"rewards/chosen": -0.06876904517412186,
|
3015 |
+
"rewards/margins": 0.2724968492984772,
|
3016 |
+
"rewards/rejected": -0.34126585721969604,
|
3017 |
+
"sft_loss": 0.4754055142402649,
|
3018 |
+
"step": 1770
|
3019 |
+
},
|
3020 |
+
{
|
3021 |
+
"epoch": 0.18749259377921343,
|
3022 |
+
"grad_norm": 6.439792156219482,
|
3023 |
+
"learning_rate": 0.0004884449749207192,
|
3024 |
+
"logits/chosen": -6.457438945770264,
|
3025 |
+
"logits/rejected": -6.457731246948242,
|
3026 |
+
"logps/chosen": -0.6336179375648499,
|
3027 |
+
"logps/rejected": -2.840827703475952,
|
3028 |
+
"loss": 0.6657,
|
3029 |
+
"odds_ratio_loss": 2.448340892791748,
|
3030 |
+
"rewards/accuracies": 0.8708333373069763,
|
3031 |
+
"rewards/chosen": -0.06336179375648499,
|
3032 |
+
"rewards/margins": 0.22072099149227142,
|
3033 |
+
"rewards/rejected": -0.2840828001499176,
|
3034 |
+
"sft_loss": 0.420904278755188,
|
3035 |
+
"step": 1780
|
3036 |
+
},
|
3037 |
+
{
|
3038 |
+
"epoch": 0.18854592295774833,
|
3039 |
+
"grad_norm": 4.342998027801514,
|
3040 |
+
"learning_rate": 0.00048816709390881266,
|
3041 |
+
"logits/chosen": -6.21989631652832,
|
3042 |
+
"logits/rejected": -6.220187664031982,
|
3043 |
+
"logps/chosen": -0.6857010722160339,
|
3044 |
+
"logps/rejected": -2.874756336212158,
|
3045 |
+
"loss": 0.7182,
|
3046 |
+
"odds_ratio_loss": 2.453400135040283,
|
3047 |
+
"rewards/accuracies": 0.8520833253860474,
|
3048 |
+
"rewards/chosen": -0.06857011467218399,
|
3049 |
+
"rewards/margins": 0.218905508518219,
|
3050 |
+
"rewards/rejected": -0.2874756455421448,
|
3051 |
+
"sft_loss": 0.47288984060287476,
|
3052 |
+
"step": 1790
|
3053 |
+
},
|
3054 |
+
{
|
3055 |
+
"epoch": 0.18959925213628323,
|
3056 |
+
"grad_norm": 33.773277282714844,
|
3057 |
+
"learning_rate": 0.0004878859921232839,
|
3058 |
+
"logits/chosen": -5.917886257171631,
|
3059 |
+
"logits/rejected": -5.9181623458862305,
|
3060 |
+
"logps/chosen": -0.7129290103912354,
|
3061 |
+
"logps/rejected": -2.9589409828186035,
|
3062 |
+
"loss": 0.7486,
|
3063 |
+
"odds_ratio_loss": 2.4278337955474854,
|
3064 |
+
"rewards/accuracies": 0.8166666626930237,
|
3065 |
+
"rewards/chosen": -0.07129290699958801,
|
3066 |
+
"rewards/margins": 0.22460119426250458,
|
3067 |
+
"rewards/rejected": -0.2958941161632538,
|
3068 |
+
"sft_loss": 0.5058320760726929,
|
3069 |
+
"step": 1800
|
3070 |
+
},
|
3071 |
+
{
|
3072 |
+
"epoch": 0.19065258131481816,
|
3073 |
+
"grad_norm": 4.040477275848389,
|
3074 |
+
"learning_rate": 0.00048760167336551964,
|
3075 |
+
"logits/chosen": -5.841413974761963,
|
3076 |
+
"logits/rejected": -5.8417158126831055,
|
3077 |
+
"logps/chosen": -0.6335561275482178,
|
3078 |
+
"logps/rejected": -3.0441653728485107,
|
3079 |
+
"loss": 0.6684,
|
3080 |
+
"odds_ratio_loss": 2.3195104598999023,
|
3081 |
+
"rewards/accuracies": 0.8354166746139526,
|
3082 |
+
"rewards/chosen": -0.06335561722517014,
|
3083 |
+
"rewards/margins": 0.24106094241142273,
|
3084 |
+
"rewards/rejected": -0.30441656708717346,
|
3085 |
+
"sft_loss": 0.4364630877971649,
|
3086 |
+
"step": 1810
|
3087 |
+
},
|
3088 |
+
{
|
3089 |
+
"epoch": 0.19170591049335306,
|
3090 |
+
"grad_norm": 6.749710559844971,
|
3091 |
+
"learning_rate": 0.0004873141414804103,
|
3092 |
+
"logits/chosen": -5.7162394523620605,
|
3093 |
+
"logits/rejected": -5.716516017913818,
|
3094 |
+
"logps/chosen": -0.6518925428390503,
|
3095 |
+
"logps/rejected": -3.0878660678863525,
|
3096 |
+
"loss": 0.6886,
|
3097 |
+
"odds_ratio_loss": 2.5920615196228027,
|
3098 |
+
"rewards/accuracies": 0.8458333611488342,
|
3099 |
+
"rewards/chosen": -0.06518926471471786,
|
3100 |
+
"rewards/margins": 0.2435973733663559,
|
3101 |
+
"rewards/rejected": -0.30878666043281555,
|
3102 |
+
"sft_loss": 0.4293573498725891,
|
3103 |
+
"step": 1820
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 0.19275923967188796,
|
3107 |
+
"grad_norm": 2.045081615447998,
|
3108 |
+
"learning_rate": 0.00048702340035629787,
|
3109 |
+
"logits/chosen": -5.856993198394775,
|
3110 |
+
"logits/rejected": -5.857146263122559,
|
3111 |
+
"logps/chosen": -0.6080865263938904,
|
3112 |
+
"logps/rejected": -2.7589311599731445,
|
3113 |
+
"loss": 0.6369,
|
3114 |
+
"odds_ratio_loss": 2.007796287536621,
|
3115 |
+
"rewards/accuracies": 0.8666666746139526,
|
3116 |
+
"rewards/chosen": -0.06080865487456322,
|
3117 |
+
"rewards/margins": 0.21508444845676422,
|
3118 |
+
"rewards/rejected": -0.27589309215545654,
|
3119 |
+
"sft_loss": 0.43609535694122314,
|
3120 |
+
"step": 1830
|
3121 |
+
},
|
3122 |
+
{
|
3123 |
+
"epoch": 0.19381256885042286,
|
3124 |
+
"grad_norm": 4.158146381378174,
|
3125 |
+
"learning_rate": 0.0004867294539249234,
|
3126 |
+
"logits/chosen": -6.230529308319092,
|
3127 |
+
"logits/rejected": -6.230895519256592,
|
3128 |
+
"logps/chosen": -0.6979438066482544,
|
3129 |
+
"logps/rejected": -3.5587799549102783,
|
3130 |
+
"loss": 0.7295,
|
3131 |
+
"odds_ratio_loss": 2.502671718597412,
|
3132 |
+
"rewards/accuracies": 0.862500011920929,
|
3133 |
+
"rewards/chosen": -0.06979438662528992,
|
3134 |
+
"rewards/margins": 0.2860836088657379,
|
3135 |
+
"rewards/rejected": -0.35587799549102783,
|
3136 |
+
"sft_loss": 0.4792328178882599,
|
3137 |
+
"step": 1840
|
3138 |
+
},
|
3139 |
+
{
|
3140 |
+
"epoch": 0.19486589802895776,
|
3141 |
+
"grad_norm": 2.7599072456359863,
|
3142 |
+
"learning_rate": 0.0004864323061613738,
|
3143 |
+
"logits/chosen": -6.244935512542725,
|
3144 |
+
"logits/rejected": -6.245189189910889,
|
3145 |
+
"logps/chosen": -0.6155544519424438,
|
3146 |
+
"logps/rejected": -3.0617711544036865,
|
3147 |
+
"loss": 0.6473,
|
3148 |
+
"odds_ratio_loss": 2.4084250926971436,
|
3149 |
+
"rewards/accuracies": 0.84375,
|
3150 |
+
"rewards/chosen": -0.06155544891953468,
|
3151 |
+
"rewards/margins": 0.2446216493844986,
|
3152 |
+
"rewards/rejected": -0.3061771094799042,
|
3153 |
+
"sft_loss": 0.4064619243144989,
|
3154 |
+
"step": 1850
|
3155 |
+
},
|
3156 |
+
{
|
3157 |
+
"epoch": 0.1959192272074927,
|
3158 |
+
"grad_norm": 4.056497573852539,
|
3159 |
+
"learning_rate": 0.0004861319610840282,
|
3160 |
+
"logits/chosen": -5.854410648345947,
|
3161 |
+
"logits/rejected": -5.8545074462890625,
|
3162 |
+
"logps/chosen": -0.7075474262237549,
|
3163 |
+
"logps/rejected": -3.4564712047576904,
|
3164 |
+
"loss": 0.7458,
|
3165 |
+
"odds_ratio_loss": 2.5600531101226807,
|
3166 |
+
"rewards/accuracies": 0.8333333134651184,
|
3167 |
+
"rewards/chosen": -0.07075474411249161,
|
3168 |
+
"rewards/margins": 0.2748924195766449,
|
3169 |
+
"rewards/rejected": -0.3456471860408783,
|
3170 |
+
"sft_loss": 0.4897785782814026,
|
3171 |
+
"step": 1860
|
3172 |
+
},
|
3173 |
+
{
|
3174 |
+
"epoch": 0.1969725563860276,
|
3175 |
+
"grad_norm": 7.0494489669799805,
|
3176 |
+
"learning_rate": 0.00048582842275450366,
|
3177 |
+
"logits/chosen": -5.870307922363281,
|
3178 |
+
"logits/rejected": -5.870253086090088,
|
3179 |
+
"logps/chosen": -0.6499666571617126,
|
3180 |
+
"logps/rejected": -3.4302499294281006,
|
3181 |
+
"loss": 0.6847,
|
3182 |
+
"odds_ratio_loss": 2.5706870555877686,
|
3183 |
+
"rewards/accuracies": 0.8520833253860474,
|
3184 |
+
"rewards/chosen": -0.06499668210744858,
|
3185 |
+
"rewards/margins": 0.27802836894989014,
|
3186 |
+
"rewards/rejected": -0.3430250287055969,
|
3187 |
+
"sft_loss": 0.4276408553123474,
|
3188 |
+
"step": 1870
|
3189 |
+
},
|
3190 |
+
{
|
3191 |
+
"epoch": 0.1980258855645625,
|
3192 |
+
"grad_norm": 9.770491600036621,
|
3193 |
+
"learning_rate": 0.0004855216952775999,
|
3194 |
+
"logits/chosen": -6.05530309677124,
|
3195 |
+
"logits/rejected": -6.05518102645874,
|
3196 |
+
"logps/chosen": -0.6710807681083679,
|
3197 |
+
"logps/rejected": -3.7501718997955322,
|
3198 |
+
"loss": 0.7031,
|
3199 |
+
"odds_ratio_loss": 2.5512893199920654,
|
3200 |
+
"rewards/accuracies": 0.8583333492279053,
|
3201 |
+
"rewards/chosen": -0.06710807234048843,
|
3202 |
+
"rewards/margins": 0.3079090714454651,
|
3203 |
+
"rewards/rejected": -0.3750171661376953,
|
3204 |
+
"sft_loss": 0.4479447305202484,
|
3205 |
+
"step": 1880
|
3206 |
+
},
|
3207 |
+
{
|
3208 |
+
"epoch": 0.1990792147430974,
|
3209 |
+
"grad_norm": 46.68679428100586,
|
3210 |
+
"learning_rate": 0.0004852117828012441,
|
3211 |
+
"logits/chosen": -6.125611782073975,
|
3212 |
+
"logits/rejected": -6.125678539276123,
|
3213 |
+
"logps/chosen": -0.8755971789360046,
|
3214 |
+
"logps/rejected": -4.0310163497924805,
|
3215 |
+
"loss": 0.9086,
|
3216 |
+
"odds_ratio_loss": 3.206876516342163,
|
3217 |
+
"rewards/accuracies": 0.8416666388511658,
|
3218 |
+
"rewards/chosen": -0.08755972236394882,
|
3219 |
+
"rewards/margins": 0.31554192304611206,
|
3220 |
+
"rewards/rejected": -0.4031016528606415,
|
3221 |
+
"sft_loss": 0.5878926515579224,
|
3222 |
+
"step": 1890
|
3223 |
+
},
|
3224 |
+
{
|
3225 |
+
"epoch": 0.20013254392163232,
|
3226 |
+
"grad_norm": 7.289094924926758,
|
3227 |
+
"learning_rate": 0.00048489868951643477,
|
3228 |
+
"logits/chosen": -6.526234149932861,
|
3229 |
+
"logits/rejected": -6.526541233062744,
|
3230 |
+
"logps/chosen": -1.0017836093902588,
|
3231 |
+
"logps/rejected": -3.509293556213379,
|
3232 |
+
"loss": 1.0477,
|
3233 |
+
"odds_ratio_loss": 3.5900070667266846,
|
3234 |
+
"rewards/accuracies": 0.8270833492279053,
|
3235 |
+
"rewards/chosen": -0.10017836093902588,
|
3236 |
+
"rewards/margins": 0.2507510483264923,
|
3237 |
+
"rewards/rejected": -0.3509294092655182,
|
3238 |
+
"sft_loss": 0.6887442469596863,
|
3239 |
+
"step": 1900
|
3240 |
}
|
3241 |
],
|
3242 |
"logging_steps": 10,
|
|
|
3256 |
"attributes": {}
|
3257 |
}
|
3258 |
},
|
3259 |
+
"total_flos": 1.4024826693511741e+18,
|
3260 |
"train_batch_size": 2,
|
3261 |
"trial_name": null,
|
3262 |
"trial_params": null
|