JulienRPA commited on
Commit
d615949
1 Parent(s): 0a31185

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77974be2f7de212cbabb9208722e3aed56b5b26f32a9ad48edffe1ae63135c5a
3
  size 1987250795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a4efaf1d7df688b67a8bcaee1b86e9920cc072ccc38b7a35781e7605583a05
3
  size 1987250795
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:424b76689f2144159866149b5173ff834369dbda461a284c8fbf7bb2fb39c63e
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac2a8552b10fda2ec260bc4ef57b30166ae633d04a1ea53f8a7688111bbf9a8
3
  size 996026489
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f49c88840fcd3e7484a250d20473a621a4a6be7f193bb562a06eafd55eb4439
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb3b262c5f81955dcb07adc7c487ce014a6181d07adcbde8bbbe4f137c99fced
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:779622c08e70491b7a953485f3890d1c81acb5e7c732a61a899ff0473fd0cfdf
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e8f70b4a42839623f23a2d7f6f16070a0bb7a8546d17d8052ec84cfde1f2b48
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.224066390041494,
5
- "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -990,11 +990,347 @@
990
  "learning_rate": 2.1064814814814816e-05,
991
  "loss": 0.5598,
992
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
993
  }
994
  ],
995
  "max_steps": 24100,
996
  "num_train_epochs": 10,
997
- "total_flos": 4202340588086976.0,
998
  "trial_name": null,
999
  "trial_params": null
1000
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.298755186721992,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
990
  "learning_rate": 2.1064814814814816e-05,
991
  "loss": 0.5598,
992
  "step": 15000
993
+ },
994
+ {
995
+ "epoch": 6.27,
996
+ "learning_rate": 2.0833333333333336e-05,
997
+ "loss": 0.5746,
998
+ "step": 15100
999
+ },
1000
+ {
1001
+ "epoch": 6.31,
1002
+ "learning_rate": 2.0601851851851853e-05,
1003
+ "loss": 0.5545,
1004
+ "step": 15200
1005
+ },
1006
+ {
1007
+ "epoch": 6.35,
1008
+ "learning_rate": 2.037037037037037e-05,
1009
+ "loss": 0.5896,
1010
+ "step": 15300
1011
+ },
1012
+ {
1013
+ "epoch": 6.39,
1014
+ "learning_rate": 2.013888888888889e-05,
1015
+ "loss": 0.5367,
1016
+ "step": 15400
1017
+ },
1018
+ {
1019
+ "epoch": 6.43,
1020
+ "learning_rate": 1.990740740740741e-05,
1021
+ "loss": 0.5577,
1022
+ "step": 15500
1023
+ },
1024
+ {
1025
+ "epoch": 6.47,
1026
+ "learning_rate": 1.967592592592593e-05,
1027
+ "loss": 0.5199,
1028
+ "step": 15600
1029
+ },
1030
+ {
1031
+ "epoch": 6.51,
1032
+ "learning_rate": 1.9444444444444445e-05,
1033
+ "loss": 0.5139,
1034
+ "step": 15700
1035
+ },
1036
+ {
1037
+ "epoch": 6.56,
1038
+ "learning_rate": 1.9212962962962962e-05,
1039
+ "loss": 0.5623,
1040
+ "step": 15800
1041
+ },
1042
+ {
1043
+ "epoch": 6.6,
1044
+ "learning_rate": 1.8981481481481482e-05,
1045
+ "loss": 0.5468,
1046
+ "step": 15900
1047
+ },
1048
+ {
1049
+ "epoch": 6.64,
1050
+ "learning_rate": 1.8750000000000002e-05,
1051
+ "loss": 0.5446,
1052
+ "step": 16000
1053
+ },
1054
+ {
1055
+ "epoch": 6.64,
1056
+ "eval_bleu": 75.9224,
1057
+ "eval_em": 0.06,
1058
+ "eval_gen_len": 47.34,
1059
+ "eval_loss": 0.9507510662078857,
1060
+ "eval_rm": 0.14,
1061
+ "eval_runtime": 42.686,
1062
+ "eval_samples_per_second": 1.171,
1063
+ "eval_steps_per_second": 0.164,
1064
+ "step": 16000
1065
+ },
1066
+ {
1067
+ "epoch": 6.68,
1068
+ "learning_rate": 1.8518518518518518e-05,
1069
+ "loss": 0.5024,
1070
+ "step": 16100
1071
+ },
1072
+ {
1073
+ "epoch": 6.72,
1074
+ "learning_rate": 1.8287037037037038e-05,
1075
+ "loss": 0.585,
1076
+ "step": 16200
1077
+ },
1078
+ {
1079
+ "epoch": 6.76,
1080
+ "learning_rate": 1.8055555555555555e-05,
1081
+ "loss": 0.5118,
1082
+ "step": 16300
1083
+ },
1084
+ {
1085
+ "epoch": 6.8,
1086
+ "learning_rate": 1.7824074074074075e-05,
1087
+ "loss": 0.5362,
1088
+ "step": 16400
1089
+ },
1090
+ {
1091
+ "epoch": 6.85,
1092
+ "learning_rate": 1.7592592592592595e-05,
1093
+ "loss": 0.5509,
1094
+ "step": 16500
1095
+ },
1096
+ {
1097
+ "epoch": 6.89,
1098
+ "learning_rate": 1.736111111111111e-05,
1099
+ "loss": 0.5336,
1100
+ "step": 16600
1101
+ },
1102
+ {
1103
+ "epoch": 6.93,
1104
+ "learning_rate": 1.712962962962963e-05,
1105
+ "loss": 0.5307,
1106
+ "step": 16700
1107
+ },
1108
+ {
1109
+ "epoch": 6.97,
1110
+ "learning_rate": 1.6898148148148148e-05,
1111
+ "loss": 0.5012,
1112
+ "step": 16800
1113
+ },
1114
+ {
1115
+ "epoch": 7.01,
1116
+ "learning_rate": 1.6666666666666667e-05,
1117
+ "loss": 0.5114,
1118
+ "step": 16900
1119
+ },
1120
+ {
1121
+ "epoch": 7.05,
1122
+ "learning_rate": 1.6435185185185187e-05,
1123
+ "loss": 0.4098,
1124
+ "step": 17000
1125
+ },
1126
+ {
1127
+ "epoch": 7.1,
1128
+ "learning_rate": 1.6203703703703704e-05,
1129
+ "loss": 0.3945,
1130
+ "step": 17100
1131
+ },
1132
+ {
1133
+ "epoch": 7.14,
1134
+ "learning_rate": 1.597222222222222e-05,
1135
+ "loss": 0.4309,
1136
+ "step": 17200
1137
+ },
1138
+ {
1139
+ "epoch": 7.18,
1140
+ "learning_rate": 1.574074074074074e-05,
1141
+ "loss": 0.446,
1142
+ "step": 17300
1143
+ },
1144
+ {
1145
+ "epoch": 7.22,
1146
+ "learning_rate": 1.550925925925926e-05,
1147
+ "loss": 0.3874,
1148
+ "step": 17400
1149
+ },
1150
+ {
1151
+ "epoch": 7.26,
1152
+ "learning_rate": 1.527777777777778e-05,
1153
+ "loss": 0.4103,
1154
+ "step": 17500
1155
+ },
1156
+ {
1157
+ "epoch": 7.3,
1158
+ "learning_rate": 1.5046296296296297e-05,
1159
+ "loss": 0.4097,
1160
+ "step": 17600
1161
+ },
1162
+ {
1163
+ "epoch": 7.34,
1164
+ "learning_rate": 1.4814814814814815e-05,
1165
+ "loss": 0.3853,
1166
+ "step": 17700
1167
+ },
1168
+ {
1169
+ "epoch": 7.39,
1170
+ "learning_rate": 1.4583333333333335e-05,
1171
+ "loss": 0.3967,
1172
+ "step": 17800
1173
+ },
1174
+ {
1175
+ "epoch": 7.43,
1176
+ "learning_rate": 1.4351851851851853e-05,
1177
+ "loss": 0.4024,
1178
+ "step": 17900
1179
+ },
1180
+ {
1181
+ "epoch": 7.47,
1182
+ "learning_rate": 1.412037037037037e-05,
1183
+ "loss": 0.4362,
1184
+ "step": 18000
1185
+ },
1186
+ {
1187
+ "epoch": 7.47,
1188
+ "eval_bleu": 77.2209,
1189
+ "eval_em": 0.1,
1190
+ "eval_gen_len": 44.36,
1191
+ "eval_loss": 0.8897470235824585,
1192
+ "eval_rm": 0.22,
1193
+ "eval_runtime": 38.4221,
1194
+ "eval_samples_per_second": 1.301,
1195
+ "eval_steps_per_second": 0.182,
1196
+ "step": 18000
1197
+ },
1198
+ {
1199
+ "epoch": 7.51,
1200
+ "learning_rate": 1.388888888888889e-05,
1201
+ "loss": 0.4094,
1202
+ "step": 18100
1203
+ },
1204
+ {
1205
+ "epoch": 7.55,
1206
+ "learning_rate": 1.3657407407407408e-05,
1207
+ "loss": 0.4048,
1208
+ "step": 18200
1209
+ },
1210
+ {
1211
+ "epoch": 7.59,
1212
+ "learning_rate": 1.3425925925925928e-05,
1213
+ "loss": 0.4047,
1214
+ "step": 18300
1215
+ },
1216
+ {
1217
+ "epoch": 7.63,
1218
+ "learning_rate": 1.3194444444444446e-05,
1219
+ "loss": 0.4203,
1220
+ "step": 18400
1221
+ },
1222
+ {
1223
+ "epoch": 7.68,
1224
+ "learning_rate": 1.2962962962962962e-05,
1225
+ "loss": 0.4176,
1226
+ "step": 18500
1227
+ },
1228
+ {
1229
+ "epoch": 7.72,
1230
+ "learning_rate": 1.2731481481481482e-05,
1231
+ "loss": 0.4351,
1232
+ "step": 18600
1233
+ },
1234
+ {
1235
+ "epoch": 7.76,
1236
+ "learning_rate": 1.25e-05,
1237
+ "loss": 0.3931,
1238
+ "step": 18700
1239
+ },
1240
+ {
1241
+ "epoch": 7.8,
1242
+ "learning_rate": 1.2268518518518519e-05,
1243
+ "loss": 0.3883,
1244
+ "step": 18800
1245
+ },
1246
+ {
1247
+ "epoch": 7.84,
1248
+ "learning_rate": 1.2037037037037037e-05,
1249
+ "loss": 0.3879,
1250
+ "step": 18900
1251
+ },
1252
+ {
1253
+ "epoch": 7.88,
1254
+ "learning_rate": 1.1805555555555555e-05,
1255
+ "loss": 0.3854,
1256
+ "step": 19000
1257
+ },
1258
+ {
1259
+ "epoch": 7.93,
1260
+ "learning_rate": 1.1574074074074075e-05,
1261
+ "loss": 0.3694,
1262
+ "step": 19100
1263
+ },
1264
+ {
1265
+ "epoch": 7.97,
1266
+ "learning_rate": 1.1342592592592593e-05,
1267
+ "loss": 0.4217,
1268
+ "step": 19200
1269
+ },
1270
+ {
1271
+ "epoch": 8.01,
1272
+ "learning_rate": 1.1111111111111112e-05,
1273
+ "loss": 0.3859,
1274
+ "step": 19300
1275
+ },
1276
+ {
1277
+ "epoch": 8.05,
1278
+ "learning_rate": 1.087962962962963e-05,
1279
+ "loss": 0.2946,
1280
+ "step": 19400
1281
+ },
1282
+ {
1283
+ "epoch": 8.09,
1284
+ "learning_rate": 1.0648148148148148e-05,
1285
+ "loss": 0.2993,
1286
+ "step": 19500
1287
+ },
1288
+ {
1289
+ "epoch": 8.13,
1290
+ "learning_rate": 1.0416666666666668e-05,
1291
+ "loss": 0.3085,
1292
+ "step": 19600
1293
+ },
1294
+ {
1295
+ "epoch": 8.17,
1296
+ "learning_rate": 1.0185185185185185e-05,
1297
+ "loss": 0.3064,
1298
+ "step": 19700
1299
+ },
1300
+ {
1301
+ "epoch": 8.22,
1302
+ "learning_rate": 9.953703703703704e-06,
1303
+ "loss": 0.2933,
1304
+ "step": 19800
1305
+ },
1306
+ {
1307
+ "epoch": 8.26,
1308
+ "learning_rate": 9.722222222222223e-06,
1309
+ "loss": 0.3256,
1310
+ "step": 19900
1311
+ },
1312
+ {
1313
+ "epoch": 8.3,
1314
+ "learning_rate": 9.490740740740741e-06,
1315
+ "loss": 0.3231,
1316
+ "step": 20000
1317
+ },
1318
+ {
1319
+ "epoch": 8.3,
1320
+ "eval_bleu": 78.5526,
1321
+ "eval_em": 0.16,
1322
+ "eval_gen_len": 46.66,
1323
+ "eval_loss": 0.923231840133667,
1324
+ "eval_rm": 0.26,
1325
+ "eval_runtime": 38.1607,
1326
+ "eval_samples_per_second": 1.31,
1327
+ "eval_steps_per_second": 0.183,
1328
+ "step": 20000
1329
  }
1330
  ],
1331
  "max_steps": 24100,
1332
  "num_train_epochs": 10,
1333
+ "total_flos": 5595722553109824.0,
1334
  "trial_name": null,
1335
  "trial_params": null
1336
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:424b76689f2144159866149b5173ff834369dbda461a284c8fbf7bb2fb39c63e
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac2a8552b10fda2ec260bc4ef57b30166ae633d04a1ea53f8a7688111bbf9a8
3
  size 996026489
runs/May23_15-22-34_9fa8c32f042d/events.out.tfevents.1684855595.9fa8c32f042d.2702.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94f6fffc24ea17e68f3d4f5e198e1b2c8ea8005ae01df56b4238a9b453c896c5
3
- size 35326
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11eaca0cd6ded2e6a60b26453da92cdba53b13c25024fd83859555acf5cc1f31
3
+ size 44691