AmberYifan commited on
Commit
9c40dae
·
verified ·
1 Parent(s): 7d5eb33

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step936/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba23f67105dfdf2606c4117e349b8b3411572f1a06f7274b93df98e0143a2340
3
+ size 15231238316
last-checkpoint/global_step936/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dad3d1ee2361272c98613600e6f8429d26a7b8d1074ac9377d72ac245455869
3
+ size 15231238316
last-checkpoint/global_step936/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a6d7e737ce568b4e386577d72d1f92716a80be5f246cfd3aa1556290a60eed
3
+ size 15231238316
last-checkpoint/global_step936/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f229e47f657f28ec28e62aaeca029699998ba7522ae8c22350737dc7025fc9
3
+ size 15231238316
last-checkpoint/global_step936/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a4e58bbf7a35296b99917a789d4b78e20da4a4bf2a8c214a2e5a32093d923a8
3
+ size 167957
last-checkpoint/global_step936/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75545d2cf19fd330fc12c711c4743c238fae2d43b92fd3cc2dc97a854b1edcf
3
+ size 167957
last-checkpoint/global_step936/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd58531347d43a63a43eb6be7ca653248617eb89702bb4332abb01d1374cf643
3
+ size 167957
last-checkpoint/global_step936/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c6399b2ff9579e7d03117018ec8e18fcb11197d75bc16ca790017943fc7e6e
3
+ size 167957
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step624
 
1
+ global_step936
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dcd3e4a895b73381d5d935ddd5929761c94d2e1528411a4dbef227b6bc5a2bb
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b480061d20c9dde9569efa10be72a732fef72237acaa351113526613d1effe7
3
  size 4877660776
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf2f0cb81392172f2ca379ba4cd18b73ff55a7f1e263d8e1d13b571c3ae0496d
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e03818263419c0eb073727a956e1d356f84beed0b0f403009ddbb0c4ba85c10
3
  size 4932751008
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c75756c367ad34e736c60b0f08af15606111abb54903816ab10581f170dd1ea
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b9e1687de6db5715c623975316bab142f0b4400c82827d5a0ba40a036cb290
3
  size 4330865200
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35448a8a2334c73339c2cb7cec36694aade444feb087ec1989b3195df69de5c
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec9edd546f3223e9bdef16c620819d972a426e63309d450e681b77ed1a0be35
3
  size 1089994880
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2307c03867cef25b5028feb9a23f80e784b9af9a615de13ddca560a6a90fb593
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7cf0d34d60dfda516cf9661904550e2e294e723edd07c25c738f05e8ba92d1
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50218cfaecdd818354e567b7167c13899e3b42297e7d8f58bd7e732cfa547800
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b183324e8227a51a9556d86b2ad893a8c4c52205ed4a737356c6611dac7353
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9814a66b49861f5495b06dae3be12ddf7185b88e2cae1fb808ca9efd99d5807f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac69e994090f4818cb1fa6f6cefa363178552c3c731c6507ff195bcb07fd5bef
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7be93040a65e0a29975f6c70b94418e1fdf88423a50c58aa572141d3c92fbfc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e61b63402f8afb1f69c960f7944965655dac11e3ccf29919c282f23931f86
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eba46a04fca9d4584d0e7af07b2d004fba441dbf422bdb9977bfac0706fb41fe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68e0a486bddf6dff90d73426f2281b66faaad35da219979b7985f12d4a9a424
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 624,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -984,6 +984,487 @@
984
  "eval_samples_per_second": 15.148,
985
  "eval_steps_per_second": 0.533,
986
  "step": 624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987
  }
988
  ],
989
  "logging_steps": 10,
@@ -998,7 +1479,7 @@
998
  "should_evaluate": false,
999
  "should_log": false,
1000
  "should_save": true,
1001
- "should_training_stop": false
1002
  },
1003
  "attributes": {}
1004
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 936,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
984
  "eval_samples_per_second": 15.148,
985
  "eval_steps_per_second": 0.533,
986
  "step": 624
987
+ },
988
+ {
989
+ "epoch": 2.019230769230769,
990
+ "grad_norm": 7.602302274735335,
991
+ "learning_rate": 1.8171021377672207e-07,
992
+ "logits/chosen": -2.078125,
993
+ "logits/rejected": -1.890625,
994
+ "logps/chosen": -158.0,
995
+ "logps/rejected": -249.0,
996
+ "loss": 0.0824,
997
+ "rewards/accuracies": 0.949999988079071,
998
+ "rewards/chosen": -0.703125,
999
+ "rewards/margins": 4.9375,
1000
+ "rewards/rejected": -5.65625,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 2.051282051282051,
1005
+ "grad_norm": 5.740339271228209,
1006
+ "learning_rate": 1.7577197149643706e-07,
1007
+ "logits/chosen": -2.171875,
1008
+ "logits/rejected": -1.84375,
1009
+ "logps/chosen": -127.5,
1010
+ "logps/rejected": -204.0,
1011
+ "loss": 0.073,
1012
+ "rewards/accuracies": 0.987500011920929,
1013
+ "rewards/chosen": -0.44140625,
1014
+ "rewards/margins": 4.875,
1015
+ "rewards/rejected": -5.3125,
1016
+ "step": 640
1017
+ },
1018
+ {
1019
+ "epoch": 2.0833333333333335,
1020
+ "grad_norm": 7.905725533448705,
1021
+ "learning_rate": 1.6983372921615202e-07,
1022
+ "logits/chosen": -2.171875,
1023
+ "logits/rejected": -1.953125,
1024
+ "logps/chosen": -134.0,
1025
+ "logps/rejected": -218.0,
1026
+ "loss": 0.0712,
1027
+ "rewards/accuracies": 0.987500011920929,
1028
+ "rewards/chosen": -0.88671875,
1029
+ "rewards/margins": 4.84375,
1030
+ "rewards/rejected": -5.71875,
1031
+ "step": 650
1032
+ },
1033
+ {
1034
+ "epoch": 2.1153846153846154,
1035
+ "grad_norm": 8.114397917313823,
1036
+ "learning_rate": 1.6389548693586697e-07,
1037
+ "logits/chosen": -2.15625,
1038
+ "logits/rejected": -1.890625,
1039
+ "logps/chosen": -152.0,
1040
+ "logps/rejected": -248.0,
1041
+ "loss": 0.0518,
1042
+ "rewards/accuracies": 0.987500011920929,
1043
+ "rewards/chosen": -0.486328125,
1044
+ "rewards/margins": 5.65625,
1045
+ "rewards/rejected": -6.15625,
1046
+ "step": 660
1047
+ },
1048
+ {
1049
+ "epoch": 2.1474358974358974,
1050
+ "grad_norm": 8.087418605885372,
1051
+ "learning_rate": 1.5795724465558193e-07,
1052
+ "logits/chosen": -2.078125,
1053
+ "logits/rejected": -1.890625,
1054
+ "logps/chosen": -180.0,
1055
+ "logps/rejected": -246.0,
1056
+ "loss": 0.0685,
1057
+ "rewards/accuracies": 1.0,
1058
+ "rewards/chosen": -1.0234375,
1059
+ "rewards/margins": 5.3125,
1060
+ "rewards/rejected": -6.34375,
1061
+ "step": 670
1062
+ },
1063
+ {
1064
+ "epoch": 2.1794871794871793,
1065
+ "grad_norm": 4.110432168859028,
1066
+ "learning_rate": 1.520190023752969e-07,
1067
+ "logits/chosen": -2.03125,
1068
+ "logits/rejected": -1.8828125,
1069
+ "logps/chosen": -147.0,
1070
+ "logps/rejected": -240.0,
1071
+ "loss": 0.0593,
1072
+ "rewards/accuracies": 0.987500011920929,
1073
+ "rewards/chosen": -0.828125,
1074
+ "rewards/margins": 5.0625,
1075
+ "rewards/rejected": -5.875,
1076
+ "step": 680
1077
+ },
1078
+ {
1079
+ "epoch": 2.2115384615384617,
1080
+ "grad_norm": 3.9500688995557565,
1081
+ "learning_rate": 1.4608076009501184e-07,
1082
+ "logits/chosen": -2.15625,
1083
+ "logits/rejected": -1.8828125,
1084
+ "logps/chosen": -149.0,
1085
+ "logps/rejected": -212.0,
1086
+ "loss": 0.0679,
1087
+ "rewards/accuracies": 0.9624999761581421,
1088
+ "rewards/chosen": -0.7421875,
1089
+ "rewards/margins": 5.0625,
1090
+ "rewards/rejected": -5.8125,
1091
+ "step": 690
1092
+ },
1093
+ {
1094
+ "epoch": 2.2435897435897436,
1095
+ "grad_norm": 3.869815076968618,
1096
+ "learning_rate": 1.4014251781472683e-07,
1097
+ "logits/chosen": -2.1875,
1098
+ "logits/rejected": -1.890625,
1099
+ "logps/chosen": -142.0,
1100
+ "logps/rejected": -237.0,
1101
+ "loss": 0.0566,
1102
+ "rewards/accuracies": 1.0,
1103
+ "rewards/chosen": -0.72265625,
1104
+ "rewards/margins": 5.9375,
1105
+ "rewards/rejected": -6.6875,
1106
+ "step": 700
1107
+ },
1108
+ {
1109
+ "epoch": 2.2756410256410255,
1110
+ "grad_norm": 6.361045590274929,
1111
+ "learning_rate": 1.342042755344418e-07,
1112
+ "logits/chosen": -2.21875,
1113
+ "logits/rejected": -1.984375,
1114
+ "logps/chosen": -127.5,
1115
+ "logps/rejected": -236.0,
1116
+ "loss": 0.0471,
1117
+ "rewards/accuracies": 1.0,
1118
+ "rewards/chosen": -0.7421875,
1119
+ "rewards/margins": 5.90625,
1120
+ "rewards/rejected": -6.65625,
1121
+ "step": 710
1122
+ },
1123
+ {
1124
+ "epoch": 2.3076923076923075,
1125
+ "grad_norm": 14.662277411395577,
1126
+ "learning_rate": 1.2826603325415677e-07,
1127
+ "logits/chosen": -2.234375,
1128
+ "logits/rejected": -1.9609375,
1129
+ "logps/chosen": -149.0,
1130
+ "logps/rejected": -241.0,
1131
+ "loss": 0.06,
1132
+ "rewards/accuracies": 0.9750000238418579,
1133
+ "rewards/chosen": -0.86328125,
1134
+ "rewards/margins": 5.6875,
1135
+ "rewards/rejected": -6.53125,
1136
+ "step": 720
1137
+ },
1138
+ {
1139
+ "epoch": 2.33974358974359,
1140
+ "grad_norm": 9.563371630730597,
1141
+ "learning_rate": 1.2232779097387173e-07,
1142
+ "logits/chosen": -2.265625,
1143
+ "logits/rejected": -2.015625,
1144
+ "logps/chosen": -159.0,
1145
+ "logps/rejected": -266.0,
1146
+ "loss": 0.0506,
1147
+ "rewards/accuracies": 0.9750000238418579,
1148
+ "rewards/chosen": -0.60546875,
1149
+ "rewards/margins": 6.25,
1150
+ "rewards/rejected": -6.84375,
1151
+ "step": 730
1152
+ },
1153
+ {
1154
+ "epoch": 2.371794871794872,
1155
+ "grad_norm": 10.814008192833413,
1156
+ "learning_rate": 1.163895486935867e-07,
1157
+ "logits/chosen": -2.109375,
1158
+ "logits/rejected": -1.8671875,
1159
+ "logps/chosen": -132.0,
1160
+ "logps/rejected": -241.0,
1161
+ "loss": 0.0465,
1162
+ "rewards/accuracies": 0.987500011920929,
1163
+ "rewards/chosen": -0.6875,
1164
+ "rewards/margins": 5.8125,
1165
+ "rewards/rejected": -6.46875,
1166
+ "step": 740
1167
+ },
1168
+ {
1169
+ "epoch": 2.4038461538461537,
1170
+ "grad_norm": 3.97366591840873,
1171
+ "learning_rate": 1.1045130641330165e-07,
1172
+ "logits/chosen": -2.140625,
1173
+ "logits/rejected": -1.828125,
1174
+ "logps/chosen": -148.0,
1175
+ "logps/rejected": -211.0,
1176
+ "loss": 0.0436,
1177
+ "rewards/accuracies": 0.987500011920929,
1178
+ "rewards/chosen": -1.109375,
1179
+ "rewards/margins": 4.6875,
1180
+ "rewards/rejected": -5.78125,
1181
+ "step": 750
1182
+ },
1183
+ {
1184
+ "epoch": 2.435897435897436,
1185
+ "grad_norm": 8.630922662380843,
1186
+ "learning_rate": 1.0451306413301662e-07,
1187
+ "logits/chosen": -2.171875,
1188
+ "logits/rejected": -1.9296875,
1189
+ "logps/chosen": -143.0,
1190
+ "logps/rejected": -252.0,
1191
+ "loss": 0.06,
1192
+ "rewards/accuracies": 1.0,
1193
+ "rewards/chosen": -1.03125,
1194
+ "rewards/margins": 5.75,
1195
+ "rewards/rejected": -6.78125,
1196
+ "step": 760
1197
+ },
1198
+ {
1199
+ "epoch": 2.467948717948718,
1200
+ "grad_norm": 12.297647342928055,
1201
+ "learning_rate": 9.857482185273158e-08,
1202
+ "logits/chosen": -2.25,
1203
+ "logits/rejected": -1.984375,
1204
+ "logps/chosen": -141.0,
1205
+ "logps/rejected": -237.0,
1206
+ "loss": 0.0778,
1207
+ "rewards/accuracies": 0.9750000238418579,
1208
+ "rewards/chosen": -0.87890625,
1209
+ "rewards/margins": 5.53125,
1210
+ "rewards/rejected": -6.40625,
1211
+ "step": 770
1212
+ },
1213
+ {
1214
+ "epoch": 2.5,
1215
+ "grad_norm": 6.128703056349062,
1216
+ "learning_rate": 9.263657957244655e-08,
1217
+ "logits/chosen": -2.125,
1218
+ "logits/rejected": -1.921875,
1219
+ "logps/chosen": -156.0,
1220
+ "logps/rejected": -272.0,
1221
+ "loss": 0.0513,
1222
+ "rewards/accuracies": 0.987500011920929,
1223
+ "rewards/chosen": -1.1015625,
1224
+ "rewards/margins": 5.5,
1225
+ "rewards/rejected": -6.59375,
1226
+ "step": 780
1227
+ },
1228
+ {
1229
+ "epoch": 2.532051282051282,
1230
+ "grad_norm": 6.1462118726242165,
1231
+ "learning_rate": 8.669833729216151e-08,
1232
+ "logits/chosen": -2.09375,
1233
+ "logits/rejected": -1.9296875,
1234
+ "logps/chosen": -155.0,
1235
+ "logps/rejected": -219.0,
1236
+ "loss": 0.0717,
1237
+ "rewards/accuracies": 0.9624999761581421,
1238
+ "rewards/chosen": -1.4609375,
1239
+ "rewards/margins": 4.71875,
1240
+ "rewards/rejected": -6.1875,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 2.564102564102564,
1245
+ "grad_norm": 2.477324464565163,
1246
+ "learning_rate": 8.076009501187649e-08,
1247
+ "logits/chosen": -2.109375,
1248
+ "logits/rejected": -1.9609375,
1249
+ "logps/chosen": -151.0,
1250
+ "logps/rejected": -276.0,
1251
+ "loss": 0.0596,
1252
+ "rewards/accuracies": 0.9624999761581421,
1253
+ "rewards/chosen": -0.53515625,
1254
+ "rewards/margins": 6.125,
1255
+ "rewards/rejected": -6.625,
1256
+ "step": 800
1257
+ },
1258
+ {
1259
+ "epoch": 2.5961538461538463,
1260
+ "grad_norm": 9.889607398096697,
1261
+ "learning_rate": 7.482185273159145e-08,
1262
+ "logits/chosen": -2.171875,
1263
+ "logits/rejected": -1.9375,
1264
+ "logps/chosen": -141.0,
1265
+ "logps/rejected": -260.0,
1266
+ "loss": 0.0714,
1267
+ "rewards/accuracies": 0.987500011920929,
1268
+ "rewards/chosen": -0.92578125,
1269
+ "rewards/margins": 6.0,
1270
+ "rewards/rejected": -6.9375,
1271
+ "step": 810
1272
+ },
1273
+ {
1274
+ "epoch": 2.628205128205128,
1275
+ "grad_norm": 13.491047889005468,
1276
+ "learning_rate": 6.88836104513064e-08,
1277
+ "logits/chosen": -2.140625,
1278
+ "logits/rejected": -1.8515625,
1279
+ "logps/chosen": -137.0,
1280
+ "logps/rejected": -217.0,
1281
+ "loss": 0.0532,
1282
+ "rewards/accuracies": 0.9750000238418579,
1283
+ "rewards/chosen": -1.1796875,
1284
+ "rewards/margins": 5.28125,
1285
+ "rewards/rejected": -6.46875,
1286
+ "step": 820
1287
+ },
1288
+ {
1289
+ "epoch": 2.66025641025641,
1290
+ "grad_norm": 12.675796793182576,
1291
+ "learning_rate": 6.294536817102138e-08,
1292
+ "logits/chosen": -2.15625,
1293
+ "logits/rejected": -1.921875,
1294
+ "logps/chosen": -148.0,
1295
+ "logps/rejected": -231.0,
1296
+ "loss": 0.0521,
1297
+ "rewards/accuracies": 1.0,
1298
+ "rewards/chosen": -1.359375,
1299
+ "rewards/margins": 5.53125,
1300
+ "rewards/rejected": -6.875,
1301
+ "step": 830
1302
+ },
1303
+ {
1304
+ "epoch": 2.6923076923076925,
1305
+ "grad_norm": 6.986806620253292,
1306
+ "learning_rate": 5.700712589073634e-08,
1307
+ "logits/chosen": -2.3125,
1308
+ "logits/rejected": -2.0,
1309
+ "logps/chosen": -132.0,
1310
+ "logps/rejected": -254.0,
1311
+ "loss": 0.0581,
1312
+ "rewards/accuracies": 0.9750000238418579,
1313
+ "rewards/chosen": -0.91015625,
1314
+ "rewards/margins": 6.34375,
1315
+ "rewards/rejected": -7.25,
1316
+ "step": 840
1317
+ },
1318
+ {
1319
+ "epoch": 2.7243589743589745,
1320
+ "grad_norm": 6.0495293737323825,
1321
+ "learning_rate": 5.10688836104513e-08,
1322
+ "logits/chosen": -2.28125,
1323
+ "logits/rejected": -1.90625,
1324
+ "logps/chosen": -134.0,
1325
+ "logps/rejected": -219.0,
1326
+ "loss": 0.0463,
1327
+ "rewards/accuracies": 0.9624999761581421,
1328
+ "rewards/chosen": -1.3359375,
1329
+ "rewards/margins": 5.8125,
1330
+ "rewards/rejected": -7.125,
1331
+ "step": 850
1332
+ },
1333
+ {
1334
+ "epoch": 2.7564102564102564,
1335
+ "grad_norm": 6.627498798256024,
1336
+ "learning_rate": 4.5130641330166267e-08,
1337
+ "logits/chosen": -2.125,
1338
+ "logits/rejected": -1.9921875,
1339
+ "logps/chosen": -166.0,
1340
+ "logps/rejected": -255.0,
1341
+ "loss": 0.0433,
1342
+ "rewards/accuracies": 0.9624999761581421,
1343
+ "rewards/chosen": -1.328125,
1344
+ "rewards/margins": 5.65625,
1345
+ "rewards/rejected": -7.0,
1346
+ "step": 860
1347
+ },
1348
+ {
1349
+ "epoch": 2.7884615384615383,
1350
+ "grad_norm": 15.46615808127758,
1351
+ "learning_rate": 3.919239904988123e-08,
1352
+ "logits/chosen": -2.140625,
1353
+ "logits/rejected": -1.921875,
1354
+ "logps/chosen": -137.0,
1355
+ "logps/rejected": -227.0,
1356
+ "loss": 0.0493,
1357
+ "rewards/accuracies": 0.987500011920929,
1358
+ "rewards/chosen": -1.625,
1359
+ "rewards/margins": 5.625,
1360
+ "rewards/rejected": -7.25,
1361
+ "step": 870
1362
+ },
1363
+ {
1364
+ "epoch": 2.8205128205128203,
1365
+ "grad_norm": 8.779331452142452,
1366
+ "learning_rate": 3.32541567695962e-08,
1367
+ "logits/chosen": -2.21875,
1368
+ "logits/rejected": -1.953125,
1369
+ "logps/chosen": -145.0,
1370
+ "logps/rejected": -246.0,
1371
+ "loss": 0.0534,
1372
+ "rewards/accuracies": 1.0,
1373
+ "rewards/chosen": -1.453125,
1374
+ "rewards/margins": 6.09375,
1375
+ "rewards/rejected": -7.5625,
1376
+ "step": 880
1377
+ },
1378
+ {
1379
+ "epoch": 2.8525641025641026,
1380
+ "grad_norm": 4.870041095527679,
1381
+ "learning_rate": 2.7315914489311164e-08,
1382
+ "logits/chosen": -2.109375,
1383
+ "logits/rejected": -1.859375,
1384
+ "logps/chosen": -151.0,
1385
+ "logps/rejected": -230.0,
1386
+ "loss": 0.076,
1387
+ "rewards/accuracies": 0.949999988079071,
1388
+ "rewards/chosen": -1.6796875,
1389
+ "rewards/margins": 5.1875,
1390
+ "rewards/rejected": -6.875,
1391
+ "step": 890
1392
+ },
1393
+ {
1394
+ "epoch": 2.8846153846153846,
1395
+ "grad_norm": 16.406143452936824,
1396
+ "learning_rate": 2.1377672209026125e-08,
1397
+ "logits/chosen": -2.109375,
1398
+ "logits/rejected": -1.8984375,
1399
+ "logps/chosen": -158.0,
1400
+ "logps/rejected": -243.0,
1401
+ "loss": 0.0549,
1402
+ "rewards/accuracies": 0.9624999761581421,
1403
+ "rewards/chosen": -1.4140625,
1404
+ "rewards/margins": 5.21875,
1405
+ "rewards/rejected": -6.65625,
1406
+ "step": 900
1407
+ },
1408
+ {
1409
+ "epoch": 2.9166666666666665,
1410
+ "grad_norm": 12.234122662756715,
1411
+ "learning_rate": 1.5439429928741092e-08,
1412
+ "logits/chosen": -2.234375,
1413
+ "logits/rejected": -2.015625,
1414
+ "logps/chosen": -134.0,
1415
+ "logps/rejected": -249.0,
1416
+ "loss": 0.0666,
1417
+ "rewards/accuracies": 0.949999988079071,
1418
+ "rewards/chosen": -0.8046875,
1419
+ "rewards/margins": 6.09375,
1420
+ "rewards/rejected": -6.875,
1421
+ "step": 910
1422
+ },
1423
+ {
1424
+ "epoch": 2.948717948717949,
1425
+ "grad_norm": 6.961798221064861,
1426
+ "learning_rate": 9.501187648456057e-09,
1427
+ "logits/chosen": -2.171875,
1428
+ "logits/rejected": -1.9296875,
1429
+ "logps/chosen": -145.0,
1430
+ "logps/rejected": -250.0,
1431
+ "loss": 0.063,
1432
+ "rewards/accuracies": 0.9750000238418579,
1433
+ "rewards/chosen": -1.515625,
1434
+ "rewards/margins": 5.375,
1435
+ "rewards/rejected": -6.90625,
1436
+ "step": 920
1437
+ },
1438
+ {
1439
+ "epoch": 2.980769230769231,
1440
+ "grad_norm": 12.001510983290025,
1441
+ "learning_rate": 3.562945368171021e-09,
1442
+ "logits/chosen": -2.25,
1443
+ "logits/rejected": -1.9765625,
1444
+ "logps/chosen": -154.0,
1445
+ "logps/rejected": -272.0,
1446
+ "loss": 0.0594,
1447
+ "rewards/accuracies": 0.9750000238418579,
1448
+ "rewards/chosen": -1.546875,
1449
+ "rewards/margins": 5.875,
1450
+ "rewards/rejected": -7.40625,
1451
+ "step": 930
1452
+ },
1453
+ {
1454
+ "epoch": 3.0,
1455
+ "eval_logits/chosen": -1.9609375,
1456
+ "eval_logits/rejected": -1.8125,
1457
+ "eval_logps/chosen": -193.0,
1458
+ "eval_logps/rejected": -210.0,
1459
+ "eval_loss": 0.6552096605300903,
1460
+ "eval_rewards/accuracies": 0.6785714030265808,
1461
+ "eval_rewards/chosen": -3.390625,
1462
+ "eval_rewards/margins": 2.109375,
1463
+ "eval_rewards/rejected": -5.5,
1464
+ "eval_runtime": 14.6293,
1465
+ "eval_samples_per_second": 13.603,
1466
+ "eval_steps_per_second": 0.478,
1467
+ "step": 936
1468
  }
1469
  ],
1470
  "logging_steps": 10,
 
1479
  "should_evaluate": false,
1480
  "should_log": false,
1481
  "should_save": true,
1482
+ "should_training_stop": true
1483
  },
1484
  "attributes": {}
1485
  }