JulienRPA commited on
Commit
6192378
1 Parent(s): f92bdc3

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e88223f691161e3ffd4564e693f314c328a9dda83c6b6102570c77aacceb5b7
3
  size 1987250795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f4d0eddea1b38fe1b064b634ee31b71591cebd4a1e1ea45cc654a255e6f2a6
3
  size 1987250795
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa9b0c8d9009664c4300b90d08bc7562ffcb54fad5cddd58a67988a9614811c9
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0d0aa4eebf05151380698116acb15efc7e866ed933da42068ae8c22cfa382e
3
  size 996026489
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4859bbb7f1b84791b7031a22e16d3ec69d1279cc060d7fd0e413b65e6970cdf9
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c405f6c1e391e2b416a111eb36de129465177d85a3f632dda3a1eb030336c8
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:779622c08e70491b7a953485f3890d1c81acb5e7c732a61a899ff0473fd0cfdf
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e8f70b4a42839623f23a2d7f6f16070a0bb7a8546d17d8052ec84cfde1f2b48
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.224066390041494,
5
- "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -990,11 +990,347 @@
990
  "learning_rate": 2.1064814814814816e-05,
991
  "loss": 0.5798,
992
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
993
  }
994
  ],
995
  "max_steps": 24100,
996
  "num_train_epochs": 10,
997
- "total_flos": 4374137384877888.0,
998
  "trial_name": null,
999
  "trial_params": null
1000
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.298755186721992,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
990
  "learning_rate": 2.1064814814814816e-05,
991
  "loss": 0.5798,
992
  "step": 15000
993
+ },
994
+ {
995
+ "epoch": 6.27,
996
+ "learning_rate": 2.0833333333333336e-05,
997
+ "loss": 0.5939,
998
+ "step": 15100
999
+ },
1000
+ {
1001
+ "epoch": 6.31,
1002
+ "learning_rate": 2.0601851851851853e-05,
1003
+ "loss": 0.6095,
1004
+ "step": 15200
1005
+ },
1006
+ {
1007
+ "epoch": 6.35,
1008
+ "learning_rate": 2.037037037037037e-05,
1009
+ "loss": 0.6163,
1010
+ "step": 15300
1011
+ },
1012
+ {
1013
+ "epoch": 6.39,
1014
+ "learning_rate": 2.013888888888889e-05,
1015
+ "loss": 0.5634,
1016
+ "step": 15400
1017
+ },
1018
+ {
1019
+ "epoch": 6.43,
1020
+ "learning_rate": 1.990740740740741e-05,
1021
+ "loss": 0.5909,
1022
+ "step": 15500
1023
+ },
1024
+ {
1025
+ "epoch": 6.47,
1026
+ "learning_rate": 1.967592592592593e-05,
1027
+ "loss": 0.5408,
1028
+ "step": 15600
1029
+ },
1030
+ {
1031
+ "epoch": 6.51,
1032
+ "learning_rate": 1.9444444444444445e-05,
1033
+ "loss": 0.5435,
1034
+ "step": 15700
1035
+ },
1036
+ {
1037
+ "epoch": 6.56,
1038
+ "learning_rate": 1.9212962962962962e-05,
1039
+ "loss": 0.5753,
1040
+ "step": 15800
1041
+ },
1042
+ {
1043
+ "epoch": 6.6,
1044
+ "learning_rate": 1.8981481481481482e-05,
1045
+ "loss": 0.5491,
1046
+ "step": 15900
1047
+ },
1048
+ {
1049
+ "epoch": 6.64,
1050
+ "learning_rate": 1.8750000000000002e-05,
1051
+ "loss": 0.5868,
1052
+ "step": 16000
1053
+ },
1054
+ {
1055
+ "epoch": 6.64,
1056
+ "eval_bleu": 71.6124,
1057
+ "eval_em": 0.02,
1058
+ "eval_gen_len": 47.52,
1059
+ "eval_loss": 0.9307076930999756,
1060
+ "eval_rm": 0.9556,
1061
+ "eval_runtime": 330.299,
1062
+ "eval_samples_per_second": 0.151,
1063
+ "eval_steps_per_second": 0.021,
1064
+ "step": 16000
1065
+ },
1066
+ {
1067
+ "epoch": 6.68,
1068
+ "learning_rate": 1.8518518518518518e-05,
1069
+ "loss": 0.5088,
1070
+ "step": 16100
1071
+ },
1072
+ {
1073
+ "epoch": 6.72,
1074
+ "learning_rate": 1.8287037037037038e-05,
1075
+ "loss": 0.6068,
1076
+ "step": 16200
1077
+ },
1078
+ {
1079
+ "epoch": 6.76,
1080
+ "learning_rate": 1.8055555555555555e-05,
1081
+ "loss": 0.5327,
1082
+ "step": 16300
1083
+ },
1084
+ {
1085
+ "epoch": 6.8,
1086
+ "learning_rate": 1.7824074074074075e-05,
1087
+ "loss": 0.5759,
1088
+ "step": 16400
1089
+ },
1090
+ {
1091
+ "epoch": 6.85,
1092
+ "learning_rate": 1.7592592592592595e-05,
1093
+ "loss": 0.5849,
1094
+ "step": 16500
1095
+ },
1096
+ {
1097
+ "epoch": 6.89,
1098
+ "learning_rate": 1.736111111111111e-05,
1099
+ "loss": 0.5648,
1100
+ "step": 16600
1101
+ },
1102
+ {
1103
+ "epoch": 6.93,
1104
+ "learning_rate": 1.712962962962963e-05,
1105
+ "loss": 0.5632,
1106
+ "step": 16700
1107
+ },
1108
+ {
1109
+ "epoch": 6.97,
1110
+ "learning_rate": 1.6898148148148148e-05,
1111
+ "loss": 0.5093,
1112
+ "step": 16800
1113
+ },
1114
+ {
1115
+ "epoch": 7.01,
1116
+ "learning_rate": 1.6666666666666667e-05,
1117
+ "loss": 0.5295,
1118
+ "step": 16900
1119
+ },
1120
+ {
1121
+ "epoch": 7.05,
1122
+ "learning_rate": 1.6435185185185187e-05,
1123
+ "loss": 0.433,
1124
+ "step": 17000
1125
+ },
1126
+ {
1127
+ "epoch": 7.1,
1128
+ "learning_rate": 1.6203703703703704e-05,
1129
+ "loss": 0.4289,
1130
+ "step": 17100
1131
+ },
1132
+ {
1133
+ "epoch": 7.14,
1134
+ "learning_rate": 1.597222222222222e-05,
1135
+ "loss": 0.4619,
1136
+ "step": 17200
1137
+ },
1138
+ {
1139
+ "epoch": 7.18,
1140
+ "learning_rate": 1.574074074074074e-05,
1141
+ "loss": 0.458,
1142
+ "step": 17300
1143
+ },
1144
+ {
1145
+ "epoch": 7.22,
1146
+ "learning_rate": 1.550925925925926e-05,
1147
+ "loss": 0.4155,
1148
+ "step": 17400
1149
+ },
1150
+ {
1151
+ "epoch": 7.26,
1152
+ "learning_rate": 1.527777777777778e-05,
1153
+ "loss": 0.4391,
1154
+ "step": 17500
1155
+ },
1156
+ {
1157
+ "epoch": 7.3,
1158
+ "learning_rate": 1.5046296296296297e-05,
1159
+ "loss": 0.4264,
1160
+ "step": 17600
1161
+ },
1162
+ {
1163
+ "epoch": 7.34,
1164
+ "learning_rate": 1.4814814814814815e-05,
1165
+ "loss": 0.418,
1166
+ "step": 17700
1167
+ },
1168
+ {
1169
+ "epoch": 7.39,
1170
+ "learning_rate": 1.4583333333333335e-05,
1171
+ "loss": 0.4285,
1172
+ "step": 17800
1173
+ },
1174
+ {
1175
+ "epoch": 7.43,
1176
+ "learning_rate": 1.4351851851851853e-05,
1177
+ "loss": 0.4466,
1178
+ "step": 17900
1179
+ },
1180
+ {
1181
+ "epoch": 7.47,
1182
+ "learning_rate": 1.412037037037037e-05,
1183
+ "loss": 0.4499,
1184
+ "step": 18000
1185
+ },
1186
+ {
1187
+ "epoch": 7.47,
1188
+ "eval_bleu": 77.237,
1189
+ "eval_em": 0.06,
1190
+ "eval_gen_len": 46.0,
1191
+ "eval_loss": 0.8866045475006104,
1192
+ "eval_rm": 0.9574,
1193
+ "eval_runtime": 280.5418,
1194
+ "eval_samples_per_second": 0.178,
1195
+ "eval_steps_per_second": 0.025,
1196
+ "step": 18000
1197
+ },
1198
+ {
1199
+ "epoch": 7.51,
1200
+ "learning_rate": 1.388888888888889e-05,
1201
+ "loss": 0.4415,
1202
+ "step": 18100
1203
+ },
1204
+ {
1205
+ "epoch": 7.55,
1206
+ "learning_rate": 1.3657407407407408e-05,
1207
+ "loss": 0.4209,
1208
+ "step": 18200
1209
+ },
1210
+ {
1211
+ "epoch": 7.59,
1212
+ "learning_rate": 1.3425925925925928e-05,
1213
+ "loss": 0.4357,
1214
+ "step": 18300
1215
+ },
1216
+ {
1217
+ "epoch": 7.63,
1218
+ "learning_rate": 1.3194444444444446e-05,
1219
+ "loss": 0.437,
1220
+ "step": 18400
1221
+ },
1222
+ {
1223
+ "epoch": 7.68,
1224
+ "learning_rate": 1.2962962962962962e-05,
1225
+ "loss": 0.4319,
1226
+ "step": 18500
1227
+ },
1228
+ {
1229
+ "epoch": 7.72,
1230
+ "learning_rate": 1.2731481481481482e-05,
1231
+ "loss": 0.4578,
1232
+ "step": 18600
1233
+ },
1234
+ {
1235
+ "epoch": 7.76,
1236
+ "learning_rate": 1.25e-05,
1237
+ "loss": 0.4244,
1238
+ "step": 18700
1239
+ },
1240
+ {
1241
+ "epoch": 7.8,
1242
+ "learning_rate": 1.2268518518518519e-05,
1243
+ "loss": 0.4046,
1244
+ "step": 18800
1245
+ },
1246
+ {
1247
+ "epoch": 7.84,
1248
+ "learning_rate": 1.2037037037037037e-05,
1249
+ "loss": 0.411,
1250
+ "step": 18900
1251
+ },
1252
+ {
1253
+ "epoch": 7.88,
1254
+ "learning_rate": 1.1805555555555555e-05,
1255
+ "loss": 0.4219,
1256
+ "step": 19000
1257
+ },
1258
+ {
1259
+ "epoch": 7.93,
1260
+ "learning_rate": 1.1574074074074075e-05,
1261
+ "loss": 0.3956,
1262
+ "step": 19100
1263
+ },
1264
+ {
1265
+ "epoch": 7.97,
1266
+ "learning_rate": 1.1342592592592593e-05,
1267
+ "loss": 0.4333,
1268
+ "step": 19200
1269
+ },
1270
+ {
1271
+ "epoch": 8.01,
1272
+ "learning_rate": 1.1111111111111112e-05,
1273
+ "loss": 0.4141,
1274
+ "step": 19300
1275
+ },
1276
+ {
1277
+ "epoch": 8.05,
1278
+ "learning_rate": 1.087962962962963e-05,
1279
+ "loss": 0.3199,
1280
+ "step": 19400
1281
+ },
1282
+ {
1283
+ "epoch": 8.09,
1284
+ "learning_rate": 1.0648148148148148e-05,
1285
+ "loss": 0.3236,
1286
+ "step": 19500
1287
+ },
1288
+ {
1289
+ "epoch": 8.13,
1290
+ "learning_rate": 1.0416666666666668e-05,
1291
+ "loss": 0.3405,
1292
+ "step": 19600
1293
+ },
1294
+ {
1295
+ "epoch": 8.17,
1296
+ "learning_rate": 1.0185185185185185e-05,
1297
+ "loss": 0.3411,
1298
+ "step": 19700
1299
+ },
1300
+ {
1301
+ "epoch": 8.22,
1302
+ "learning_rate": 9.953703703703704e-06,
1303
+ "loss": 0.3166,
1304
+ "step": 19800
1305
+ },
1306
+ {
1307
+ "epoch": 8.26,
1308
+ "learning_rate": 9.722222222222223e-06,
1309
+ "loss": 0.3463,
1310
+ "step": 19900
1311
+ },
1312
+ {
1313
+ "epoch": 8.3,
1314
+ "learning_rate": 9.490740740740741e-06,
1315
+ "loss": 0.3515,
1316
+ "step": 20000
1317
+ },
1318
+ {
1319
+ "epoch": 8.3,
1320
+ "eval_bleu": 77.5798,
1321
+ "eval_em": 0.08,
1322
+ "eval_gen_len": 47.5,
1323
+ "eval_loss": 0.9069581627845764,
1324
+ "eval_rm": 0.9574,
1325
+ "eval_runtime": 312.8421,
1326
+ "eval_samples_per_second": 0.16,
1327
+ "eval_steps_per_second": 0.022,
1328
+ "step": 20000
1329
  }
1330
  ],
1331
  "max_steps": 24100,
1332
  "num_train_epochs": 10,
1333
+ "total_flos": 5827422495665664.0,
1334
  "trial_name": null,
1335
  "trial_params": null
1336
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b9f4030acc1b98cd8fc39d52688ca9a4f11fa176fbaa30e9b52baf417f88e0
3
- size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39883603e0f5d5f423c83fa5b774cffc6fd43f8970ddb1f1665c98b8cf652f9
3
+ size 4219
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa9b0c8d9009664c4300b90d08bc7562ffcb54fad5cddd58a67988a9614811c9
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0d0aa4eebf05151380698116acb15efc7e866ed933da42068ae8c22cfa382e
3
  size 996026489
runs/May23_11-27-41_4b659dec4dc1/1684841942.6076107/events.out.tfevents.1684841942.4b659dec4dc1.6139.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:552ac6b3d8683237063758cc1cf667f0e68175fbe9bb74e25dc0cdc033aa36e5
3
+ size 6342
runs/May23_11-27-41_4b659dec4dc1/events.out.tfevents.1684841942.4b659dec4dc1.6139.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46bed9901348db72ec0ddf87172765591157e7e6f99f5dd9c06ca85bd762ce33
3
+ size 17952
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b9f4030acc1b98cd8fc39d52688ca9a4f11fa176fbaa30e9b52baf417f88e0
3
- size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39883603e0f5d5f423c83fa5b774cffc6fd43f8970ddb1f1665c98b8cf652f9
3
+ size 4219