Federic commited on
Commit
1aff076
1 Parent(s): f4b6a6f

Training in progress, step 225, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a53e67f900d6f4e2bbd3b35b2f81421df59c5d763fcedb22c671941b2b5c9e5
3
  size 536906096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c78534400a8b1c3581b037edd215980d243e75564ee8c4dab60c5e0b53ebd30
3
  size 536906096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8755246a4ea026b83dd138f2d34b63b1e024ea178cbd41ff82d4f069a99c1642
3
  size 269267284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d3d33718eff1a679f0a84edb0fd03bba3b66aa951fd597d58d8897db54c356
3
  size 269267284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb19b4241f3fa973ce278cd7e299bf5c1382f2c8baefb52b4a8b59ee6a38edb4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3727d398dfb2cbe7ff900b8e204a6b14ee39eccffb766cfcfe7798fdd0682e50
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859ff0676471245c9481ca25d6d6778d1c7963c39b7877af46bb8ca30a9ead21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,13 +1207,163 @@
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.3175,
1209
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 1,
1213
  "max_steps": 250,
1214
  "num_train_epochs": 1,
1215
  "save_steps": 25,
1216
- "total_flos": 1.553804770000896e+16,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9,
5
  "eval_steps": 500,
6
+ "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.3175,
1209
  "step": 200
1210
+ },
1211
+ {
1212
+ "epoch": 0.8,
1213
+ "learning_rate": 0.0002,
1214
+ "loss": 0.5803,
1215
+ "step": 201
1216
+ },
1217
+ {
1218
+ "epoch": 0.81,
1219
+ "learning_rate": 0.0002,
1220
+ "loss": 0.4885,
1221
+ "step": 202
1222
+ },
1223
+ {
1224
+ "epoch": 0.81,
1225
+ "learning_rate": 0.0002,
1226
+ "loss": 0.5339,
1227
+ "step": 203
1228
+ },
1229
+ {
1230
+ "epoch": 0.82,
1231
+ "learning_rate": 0.0002,
1232
+ "loss": 0.5641,
1233
+ "step": 204
1234
+ },
1235
+ {
1236
+ "epoch": 0.82,
1237
+ "learning_rate": 0.0002,
1238
+ "loss": 0.5516,
1239
+ "step": 205
1240
+ },
1241
+ {
1242
+ "epoch": 0.82,
1243
+ "learning_rate": 0.0002,
1244
+ "loss": 0.4112,
1245
+ "step": 206
1246
+ },
1247
+ {
1248
+ "epoch": 0.83,
1249
+ "learning_rate": 0.0002,
1250
+ "loss": 0.4119,
1251
+ "step": 207
1252
+ },
1253
+ {
1254
+ "epoch": 0.83,
1255
+ "learning_rate": 0.0002,
1256
+ "loss": 0.4461,
1257
+ "step": 208
1258
+ },
1259
+ {
1260
+ "epoch": 0.84,
1261
+ "learning_rate": 0.0002,
1262
+ "loss": 0.4085,
1263
+ "step": 209
1264
+ },
1265
+ {
1266
+ "epoch": 0.84,
1267
+ "learning_rate": 0.0002,
1268
+ "loss": 0.4434,
1269
+ "step": 210
1270
+ },
1271
+ {
1272
+ "epoch": 0.84,
1273
+ "learning_rate": 0.0002,
1274
+ "loss": 0.4333,
1275
+ "step": 211
1276
+ },
1277
+ {
1278
+ "epoch": 0.85,
1279
+ "learning_rate": 0.0002,
1280
+ "loss": 0.4078,
1281
+ "step": 212
1282
+ },
1283
+ {
1284
+ "epoch": 0.85,
1285
+ "learning_rate": 0.0002,
1286
+ "loss": 0.445,
1287
+ "step": 213
1288
+ },
1289
+ {
1290
+ "epoch": 0.86,
1291
+ "learning_rate": 0.0002,
1292
+ "loss": 0.4662,
1293
+ "step": 214
1294
+ },
1295
+ {
1296
+ "epoch": 0.86,
1297
+ "learning_rate": 0.0002,
1298
+ "loss": 0.4194,
1299
+ "step": 215
1300
+ },
1301
+ {
1302
+ "epoch": 0.86,
1303
+ "learning_rate": 0.0002,
1304
+ "loss": 0.4517,
1305
+ "step": 216
1306
+ },
1307
+ {
1308
+ "epoch": 0.87,
1309
+ "learning_rate": 0.0002,
1310
+ "loss": 0.4191,
1311
+ "step": 217
1312
+ },
1313
+ {
1314
+ "epoch": 0.87,
1315
+ "learning_rate": 0.0002,
1316
+ "loss": 0.4019,
1317
+ "step": 218
1318
+ },
1319
+ {
1320
+ "epoch": 0.88,
1321
+ "learning_rate": 0.0002,
1322
+ "loss": 0.4391,
1323
+ "step": 219
1324
+ },
1325
+ {
1326
+ "epoch": 0.88,
1327
+ "learning_rate": 0.0002,
1328
+ "loss": 0.4271,
1329
+ "step": 220
1330
+ },
1331
+ {
1332
+ "epoch": 0.88,
1333
+ "learning_rate": 0.0002,
1334
+ "loss": 0.4275,
1335
+ "step": 221
1336
+ },
1337
+ {
1338
+ "epoch": 0.89,
1339
+ "learning_rate": 0.0002,
1340
+ "loss": 0.3938,
1341
+ "step": 222
1342
+ },
1343
+ {
1344
+ "epoch": 0.89,
1345
+ "learning_rate": 0.0002,
1346
+ "loss": 0.4537,
1347
+ "step": 223
1348
+ },
1349
+ {
1350
+ "epoch": 0.9,
1351
+ "learning_rate": 0.0002,
1352
+ "loss": 0.4199,
1353
+ "step": 224
1354
+ },
1355
+ {
1356
+ "epoch": 0.9,
1357
+ "learning_rate": 0.0002,
1358
+ "loss": 0.4072,
1359
+ "step": 225
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 1,
1365
  "save_steps": 25,
1366
+ "total_flos": 1.768010525835264e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }