AmberYifan commited on
Commit
070b435
1 Parent(s): 8d50dc9

Training in progress, step 744, checkpoint

Browse files
last-checkpoint/global_step744/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15df7b8feb64c0def21d6686e6896228795ba5c064d737fb7b4db615c6e74d16
3
+ size 14483467880
last-checkpoint/global_step744/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:735b6b88c0d927e094a0b91d0c7f7aed29f52fa2dc27100157290c05d191bdde
3
+ size 14483467880
last-checkpoint/global_step744/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f14712eceb262cfece0c425c5004dbe8abe0b47d188b38373f16f6f6eb94bd83
3
+ size 14483467880
last-checkpoint/global_step744/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d33d0778de46e61f1ccb04f2f2d1ac5899f5db4d2cc5d77f4977b4831e32ea
3
+ size 14483467880
last-checkpoint/global_step744/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a5fb8629f5851e476393962efcdc3d1b4877c56a2d137b5477b351ccf0f284
3
+ size 150629
last-checkpoint/global_step744/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6426baf25e93384fe89562a7d8b5683eb537d5425d57eadcc57234162445e8
3
+ size 150629
last-checkpoint/global_step744/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5546fc1e0e833ffb17055a7df4bbc63059537920e8f146bac42431b5b6de206b
3
+ size 150629
last-checkpoint/global_step744/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b79e376c5d97e922123f499a588e3a870b240e7791c121277324477e2ab056
3
+ size 150629
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step620
 
1
+ global_step744
last-checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93daaec3380eb160ed1b2b55d7e6a40b3148f5806a93acda8285c13c6ed11ec4
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eddc78da78db963ff6a4e64d2e60d892b8dac3e70f615cabb7ceae72ca03b62
3
  size 4943162336
last-checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d99f0e63f5427ff1eb28a5e26e8a6cb5f108907e9ec53956cf11addbcacf1c34
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940e5b0d00daf8819b94e6e57a1299366ddf8e1a00b6cd3d1da725c5a54e992b
3
  size 4999819336
last-checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dd3351df120c66885127c540da0c2667724cb06d47ddfb327a851ef7dace34c
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:543b4a9c2a0c0d8ec0093719d952db485ab8f03e97e5ec0572a12b30748d847c
3
  size 4540516344
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ea85ce0faf4f25f8f01506659575c6ffa73d2668e6dcf972bea416c2f14cf7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbba064fc12f687aed38027284f03d6f81ae6faf0de84e9180dbf16683476efa
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6df02d244f55294fc3a1bed06a850d7dc043249b3e97a2a1f72408d7e378d44b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dbadfb5a748f92f595fe38ec8cb595ec8a8e1ed626289bab87357f6453fdcc
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbafb0c52cec706b84eb0e477953ab4950ffc8e931e49bbc7a37cd7d5d929a69
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0c80d8a165b5c7da7326858ba39715fa818bb86784e4927c06f952990826c90
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007e6b920b3c7dfd19dd00d30bc01c0cd1682b25270dc31952d9b737d039f1d1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9188fea8871f54026651c146ed6d61c8ba187418003301d02d1d867f08d5f341
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a438c4653759e5a86be993ca4420444015d4e70859f1f853dd1fc989248fda
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:090ca277b83e149ceb78f8ec9d7b16b90c4d719022166461c1f9f0014114ec3d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.984,
5
  "eval_steps": 62,
6
- "global_step": 620,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1112,6 +1112,218 @@
1112
  "eval_samples_per_second": 5.465,
1113
  "eval_steps_per_second": 0.355,
1114
  "step": 620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1115
  }
1116
  ],
1117
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3808,
5
  "eval_steps": 62,
6
+ "global_step": 744,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1112
  "eval_samples_per_second": 5.465,
1113
  "eval_steps_per_second": 0.355,
1114
  "step": 620
1115
+ },
1116
+ {
1117
+ "epoch": 2.016,
1118
+ "grad_norm": 20.538104998428924,
1119
+ "learning_rate": 1.8171021377672207e-07,
1120
+ "logits/generated": -2.5056118965148926,
1121
+ "logits/real": -2.5445072650909424,
1122
+ "logps/generated": -125.99369812011719,
1123
+ "logps/real": -116.36385345458984,
1124
+ "loss": 0.3077,
1125
+ "rewards/accuracies": 0.925000011920929,
1126
+ "rewards/generated": 0.36697912216186523,
1127
+ "rewards/margins": 3.193668842315674,
1128
+ "rewards/real": 3.560647964477539,
1129
+ "step": 630
1130
+ },
1131
+ {
1132
+ "epoch": 2.048,
1133
+ "grad_norm": 26.08905065246513,
1134
+ "learning_rate": 1.7577197149643706e-07,
1135
+ "logits/generated": -2.5060019493103027,
1136
+ "logits/real": -2.5419182777404785,
1137
+ "logps/generated": -120.49100494384766,
1138
+ "logps/real": -89.79083251953125,
1139
+ "loss": 0.1648,
1140
+ "rewards/accuracies": 0.9750000238418579,
1141
+ "rewards/generated": -0.3632515072822571,
1142
+ "rewards/margins": 3.968273639678955,
1143
+ "rewards/real": 3.6050219535827637,
1144
+ "step": 640
1145
+ },
1146
+ {
1147
+ "epoch": 2.08,
1148
+ "grad_norm": 23.70246936190852,
1149
+ "learning_rate": 1.6983372921615202e-07,
1150
+ "logits/generated": -2.3460607528686523,
1151
+ "logits/real": -2.47868013381958,
1152
+ "logps/generated": -118.75785827636719,
1153
+ "logps/real": -105.28114318847656,
1154
+ "loss": 0.2231,
1155
+ "rewards/accuracies": 0.9750000238418579,
1156
+ "rewards/generated": -0.6100242137908936,
1157
+ "rewards/margins": 4.250189781188965,
1158
+ "rewards/real": 3.640165328979492,
1159
+ "step": 650
1160
+ },
1161
+ {
1162
+ "epoch": 2.112,
1163
+ "grad_norm": 52.82052578333479,
1164
+ "learning_rate": 1.6389548693586697e-07,
1165
+ "logits/generated": -2.4621224403381348,
1166
+ "logits/real": -2.535007953643799,
1167
+ "logps/generated": -125.203125,
1168
+ "logps/real": -101.14630126953125,
1169
+ "loss": 0.2196,
1170
+ "rewards/accuracies": 0.9624999761581421,
1171
+ "rewards/generated": -0.19764788448810577,
1172
+ "rewards/margins": 4.143462181091309,
1173
+ "rewards/real": 3.945814847946167,
1174
+ "step": 660
1175
+ },
1176
+ {
1177
+ "epoch": 2.144,
1178
+ "grad_norm": 24.059115327582756,
1179
+ "learning_rate": 1.5795724465558193e-07,
1180
+ "logits/generated": -2.5252292156219482,
1181
+ "logits/real": -2.523108720779419,
1182
+ "logps/generated": -147.6621856689453,
1183
+ "logps/real": -110.50947570800781,
1184
+ "loss": 0.2099,
1185
+ "rewards/accuracies": 0.9624999761581421,
1186
+ "rewards/generated": -0.5420292019844055,
1187
+ "rewards/margins": 4.283000946044922,
1188
+ "rewards/real": 3.7409720420837402,
1189
+ "step": 670
1190
+ },
1191
+ {
1192
+ "epoch": 2.176,
1193
+ "grad_norm": 31.606417791931438,
1194
+ "learning_rate": 1.520190023752969e-07,
1195
+ "logits/generated": -2.319981098175049,
1196
+ "logits/real": -2.3249740600585938,
1197
+ "logps/generated": -115.99867248535156,
1198
+ "logps/real": -91.61642456054688,
1199
+ "loss": 0.226,
1200
+ "rewards/accuracies": 0.9375,
1201
+ "rewards/generated": -0.14518947899341583,
1202
+ "rewards/margins": 3.8571395874023438,
1203
+ "rewards/real": 3.7119498252868652,
1204
+ "step": 680
1205
+ },
1206
+ {
1207
+ "epoch": 2.1824,
1208
+ "eval_logits/generated": -2.3994381427764893,
1209
+ "eval_logits/real": -2.4211130142211914,
1210
+ "eval_logps/generated": -106.77970886230469,
1211
+ "eval_logps/real": -116.72004699707031,
1212
+ "eval_loss": 0.7430130839347839,
1213
+ "eval_rewards/accuracies": 0.6346153616905212,
1214
+ "eval_rewards/generated": 1.4535826444625854,
1215
+ "eval_rewards/margins": 0.765827476978302,
1216
+ "eval_rewards/real": 2.2194101810455322,
1217
+ "eval_runtime": 37.5985,
1218
+ "eval_samples_per_second": 5.319,
1219
+ "eval_steps_per_second": 0.346,
1220
+ "step": 682
1221
+ },
1222
+ {
1223
+ "epoch": 2.208,
1224
+ "grad_norm": 42.94228027224244,
1225
+ "learning_rate": 1.4608076009501184e-07,
1226
+ "logits/generated": -2.391913414001465,
1227
+ "logits/real": -2.399176597595215,
1228
+ "logps/generated": -138.39405822753906,
1229
+ "logps/real": -105.54881286621094,
1230
+ "loss": 0.3128,
1231
+ "rewards/accuracies": 0.925000011920929,
1232
+ "rewards/generated": -0.44563937187194824,
1233
+ "rewards/margins": 3.831275224685669,
1234
+ "rewards/real": 3.3856358528137207,
1235
+ "step": 690
1236
+ },
1237
+ {
1238
+ "epoch": 2.24,
1239
+ "grad_norm": 21.835007321618892,
1240
+ "learning_rate": 1.4014251781472683e-07,
1241
+ "logits/generated": -2.358987331390381,
1242
+ "logits/real": -2.485370635986328,
1243
+ "logps/generated": -131.94631958007812,
1244
+ "logps/real": -101.49398803710938,
1245
+ "loss": 0.2099,
1246
+ "rewards/accuracies": 1.0,
1247
+ "rewards/generated": -0.540885329246521,
1248
+ "rewards/margins": 4.241226673126221,
1249
+ "rewards/real": 3.700340986251831,
1250
+ "step": 700
1251
+ },
1252
+ {
1253
+ "epoch": 2.2720000000000002,
1254
+ "grad_norm": 34.153099948226355,
1255
+ "learning_rate": 1.342042755344418e-07,
1256
+ "logits/generated": -2.4148426055908203,
1257
+ "logits/real": -2.470059394836426,
1258
+ "logps/generated": -109.29563903808594,
1259
+ "logps/real": -102.6874008178711,
1260
+ "loss": 0.2117,
1261
+ "rewards/accuracies": 0.9624999761581421,
1262
+ "rewards/generated": -0.2213163673877716,
1263
+ "rewards/margins": 3.902988910675049,
1264
+ "rewards/real": 3.6816723346710205,
1265
+ "step": 710
1266
+ },
1267
+ {
1268
+ "epoch": 2.304,
1269
+ "grad_norm": 19.318708757059966,
1270
+ "learning_rate": 1.2826603325415677e-07,
1271
+ "logits/generated": -2.3977880477905273,
1272
+ "logits/real": -2.435490369796753,
1273
+ "logps/generated": -136.2637939453125,
1274
+ "logps/real": -103.02522277832031,
1275
+ "loss": 0.1954,
1276
+ "rewards/accuracies": 0.9624999761581421,
1277
+ "rewards/generated": -0.8926679491996765,
1278
+ "rewards/margins": 4.538367748260498,
1279
+ "rewards/real": 3.645700454711914,
1280
+ "step": 720
1281
+ },
1282
+ {
1283
+ "epoch": 2.336,
1284
+ "grad_norm": 45.22290167254912,
1285
+ "learning_rate": 1.2232779097387173e-07,
1286
+ "logits/generated": -2.398287296295166,
1287
+ "logits/real": -2.4827542304992676,
1288
+ "logps/generated": -131.3472900390625,
1289
+ "logps/real": -111.76424407958984,
1290
+ "loss": 0.1815,
1291
+ "rewards/accuracies": 0.9750000238418579,
1292
+ "rewards/generated": -0.5653759241104126,
1293
+ "rewards/margins": 4.332846641540527,
1294
+ "rewards/real": 3.7674708366394043,
1295
+ "step": 730
1296
+ },
1297
+ {
1298
+ "epoch": 2.368,
1299
+ "grad_norm": 13.254233051180913,
1300
+ "learning_rate": 1.163895486935867e-07,
1301
+ "logits/generated": -2.4649178981781006,
1302
+ "logits/real": -2.532130002975464,
1303
+ "logps/generated": -152.01437377929688,
1304
+ "logps/real": -120.23062896728516,
1305
+ "loss": 0.2117,
1306
+ "rewards/accuracies": 0.9624999761581421,
1307
+ "rewards/generated": -0.7008813619613647,
1308
+ "rewards/margins": 4.570525169372559,
1309
+ "rewards/real": 3.8696446418762207,
1310
+ "step": 740
1311
+ },
1312
+ {
1313
+ "epoch": 2.3808,
1314
+ "eval_logits/generated": -2.4077060222625732,
1315
+ "eval_logits/real": -2.4527196884155273,
1316
+ "eval_logps/generated": -107.33969116210938,
1317
+ "eval_logps/real": -117.47949981689453,
1318
+ "eval_loss": 0.7449278235435486,
1319
+ "eval_rewards/accuracies": 0.5961538553237915,
1320
+ "eval_rewards/generated": 1.397584319114685,
1321
+ "eval_rewards/margins": 0.7458791732788086,
1322
+ "eval_rewards/real": 2.143463611602783,
1323
+ "eval_runtime": 36.1012,
1324
+ "eval_samples_per_second": 5.54,
1325
+ "eval_steps_per_second": 0.36,
1326
+ "step": 744
1327
  }
1328
  ],
1329
  "logging_steps": 10,