Federic commited on
Commit
bcdd46a
1 Parent(s): 3b62efa

Training in progress, step 225, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75eca5dddc386a889c6eadd70a8f95b34b18eeca352934088d95cb81557f417c
3
  size 1822093912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:674226647c0af0371804ab2d68e3d707974b6551b87819cd9b51eaf76d96623d
3
  size 1822093912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faf94306fbcd9fbb459530b7f3721ba13ee9ded04cbefa8a326a59a76016d9ba
3
  size 651839518
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5cf37ce34fe8faf97f2f26392f8fe21ffa3a7dfbb31c731986c8abfcd99633
3
  size 651839518
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df6f0f8239ffefed30e603cf30841bbf0c2fcff79585c1498fc94f8f96b5ea45
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421f99707ecd0d99d7fb40197f028ceb02c2ffdc30698df74ad0b16e6900f2cf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859ff0676471245c9481ca25d6d6778d1c7963c39b7877af46bb8ca30a9ead21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,13 +1207,163 @@
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.4379,
1209
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 1,
1213
  "max_steps": 250,
1214
  "num_train_epochs": 2,
1215
  "save_steps": 25,
1216
- "total_flos": 2.592463382721331e+16,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8,
5
  "eval_steps": 500,
6
+ "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.4379,
1209
  "step": 200
1210
+ },
1211
+ {
1212
+ "epoch": 1.61,
1213
+ "learning_rate": 0.0002,
1214
+ "loss": 0.412,
1215
+ "step": 201
1216
+ },
1217
+ {
1218
+ "epoch": 1.62,
1219
+ "learning_rate": 0.0002,
1220
+ "loss": 0.4059,
1221
+ "step": 202
1222
+ },
1223
+ {
1224
+ "epoch": 1.62,
1225
+ "learning_rate": 0.0002,
1226
+ "loss": 0.4189,
1227
+ "step": 203
1228
+ },
1229
+ {
1230
+ "epoch": 1.63,
1231
+ "learning_rate": 0.0002,
1232
+ "loss": 0.4127,
1233
+ "step": 204
1234
+ },
1235
+ {
1236
+ "epoch": 1.64,
1237
+ "learning_rate": 0.0002,
1238
+ "loss": 0.4131,
1239
+ "step": 205
1240
+ },
1241
+ {
1242
+ "epoch": 1.65,
1243
+ "learning_rate": 0.0002,
1244
+ "loss": 0.3867,
1245
+ "step": 206
1246
+ },
1247
+ {
1248
+ "epoch": 1.66,
1249
+ "learning_rate": 0.0002,
1250
+ "loss": 0.3402,
1251
+ "step": 207
1252
+ },
1253
+ {
1254
+ "epoch": 1.66,
1255
+ "learning_rate": 0.0002,
1256
+ "loss": 0.3755,
1257
+ "step": 208
1258
+ },
1259
+ {
1260
+ "epoch": 1.67,
1261
+ "learning_rate": 0.0002,
1262
+ "loss": 0.3904,
1263
+ "step": 209
1264
+ },
1265
+ {
1266
+ "epoch": 1.68,
1267
+ "learning_rate": 0.0002,
1268
+ "loss": 0.3413,
1269
+ "step": 210
1270
+ },
1271
+ {
1272
+ "epoch": 1.69,
1273
+ "learning_rate": 0.0002,
1274
+ "loss": 0.3568,
1275
+ "step": 211
1276
+ },
1277
+ {
1278
+ "epoch": 1.7,
1279
+ "learning_rate": 0.0002,
1280
+ "loss": 0.3333,
1281
+ "step": 212
1282
+ },
1283
+ {
1284
+ "epoch": 1.7,
1285
+ "learning_rate": 0.0002,
1286
+ "loss": 0.3481,
1287
+ "step": 213
1288
+ },
1289
+ {
1290
+ "epoch": 1.71,
1291
+ "learning_rate": 0.0002,
1292
+ "loss": 0.3571,
1293
+ "step": 214
1294
+ },
1295
+ {
1296
+ "epoch": 1.72,
1297
+ "learning_rate": 0.0002,
1298
+ "loss": 0.31,
1299
+ "step": 215
1300
+ },
1301
+ {
1302
+ "epoch": 1.73,
1303
+ "learning_rate": 0.0002,
1304
+ "loss": 0.3437,
1305
+ "step": 216
1306
+ },
1307
+ {
1308
+ "epoch": 1.74,
1309
+ "learning_rate": 0.0002,
1310
+ "loss": 0.3051,
1311
+ "step": 217
1312
+ },
1313
+ {
1314
+ "epoch": 1.74,
1315
+ "learning_rate": 0.0002,
1316
+ "loss": 0.3035,
1317
+ "step": 218
1318
+ },
1319
+ {
1320
+ "epoch": 1.75,
1321
+ "learning_rate": 0.0002,
1322
+ "loss": 0.5871,
1323
+ "step": 219
1324
+ },
1325
+ {
1326
+ "epoch": 1.76,
1327
+ "learning_rate": 0.0002,
1328
+ "loss": 0.5218,
1329
+ "step": 220
1330
+ },
1331
+ {
1332
+ "epoch": 1.77,
1333
+ "learning_rate": 0.0002,
1334
+ "loss": 0.5163,
1335
+ "step": 221
1336
+ },
1337
+ {
1338
+ "epoch": 1.78,
1339
+ "learning_rate": 0.0002,
1340
+ "loss": 0.5002,
1341
+ "step": 222
1342
+ },
1343
+ {
1344
+ "epoch": 1.78,
1345
+ "learning_rate": 0.0002,
1346
+ "loss": 0.4768,
1347
+ "step": 223
1348
+ },
1349
+ {
1350
+ "epoch": 1.79,
1351
+ "learning_rate": 0.0002,
1352
+ "loss": 0.5197,
1353
+ "step": 224
1354
+ },
1355
+ {
1356
+ "epoch": 1.8,
1357
+ "learning_rate": 0.0002,
1358
+ "loss": 0.4529,
1359
+ "step": 225
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 2,
1365
  "save_steps": 25,
1366
+ "total_flos": 2.907887080911667e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }