aseratus1 commited on
Commit
8d7a6e2
·
verified ·
1 Parent(s): 2470af8

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38ff59f65b26f1e229a4f3d4c73b0fefd04c74898553389ea45285c7c03e44fb
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e870d258db6d7abf45de988d9b70a0281bfdb3ea7c9c4bd38d1025f2f8cd97
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e68c2fd3fc08f26f68cc6ea31f89eb8432170046fa27022f45200b45830bbfd
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13becd55a25b6054e7f90c99303bb145437a76d281c497ac37de934bd74fa6b2
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac19d5d69213bd6ec3ac71c1dc1e56a89e4422a2146959318340f8687a25d0a0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d566d592391e3821922acc3010e303f9cd9b68e7755f6bf2181b073b99bba9c1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f26152095eec0fefdbf64011d027c8b7130d36103664f806cba7c1c0501a24f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b88c69723b71d2878f68c032ece7cd75c665849011bece10855960b5f5a1426
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4205625653266907,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1650",
4
- "epoch": 0.6255924170616114,
5
  "eval_steps": 150,
6
- "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1258,6 +1258,119 @@
1258
  "eval_samples_per_second": 21.01,
1259
  "eval_steps_per_second": 5.255,
1260
  "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1261
  }
1262
  ],
1263
  "logging_steps": 10,
@@ -1286,7 +1399,7 @@
1286
  "attributes": {}
1287
  }
1288
  },
1289
- "total_flos": 1.464578120764883e+18,
1290
  "train_batch_size": 8,
1291
  "trial_name": null,
1292
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.41478267312049866,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1800",
4
+ "epoch": 0.6824644549763034,
5
  "eval_steps": 150,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1258
  "eval_samples_per_second": 21.01,
1259
  "eval_steps_per_second": 5.255,
1260
  "step": 1650
1261
+ },
1262
+ {
1263
+ "epoch": 0.6293838862559241,
1264
+ "grad_norm": 0.7050827741622925,
1265
+ "learning_rate": 3.128717968263484e-05,
1266
+ "loss": 0.6622,
1267
+ "step": 1660
1268
+ },
1269
+ {
1270
+ "epoch": 0.633175355450237,
1271
+ "grad_norm": 0.7003112435340881,
1272
+ "learning_rate": 3.072572866936939e-05,
1273
+ "loss": 0.4541,
1274
+ "step": 1670
1275
+ },
1276
+ {
1277
+ "epoch": 0.6369668246445498,
1278
+ "grad_norm": 0.7300443053245544,
1279
+ "learning_rate": 3.016711782284058e-05,
1280
+ "loss": 0.4091,
1281
+ "step": 1680
1282
+ },
1283
+ {
1284
+ "epoch": 0.6407582938388625,
1285
+ "grad_norm": 0.6804115176200867,
1286
+ "learning_rate": 2.9611429457337613e-05,
1287
+ "loss": 0.3318,
1288
+ "step": 1690
1289
+ },
1290
+ {
1291
+ "epoch": 0.6445497630331753,
1292
+ "grad_norm": 0.6225182414054871,
1293
+ "learning_rate": 2.905874545650656e-05,
1294
+ "loss": 0.2427,
1295
+ "step": 1700
1296
+ },
1297
+ {
1298
+ "epoch": 0.6483412322274882,
1299
+ "grad_norm": 0.7422319650650024,
1300
+ "learning_rate": 2.8509147261284287e-05,
1301
+ "loss": 0.6641,
1302
+ "step": 1710
1303
+ },
1304
+ {
1305
+ "epoch": 0.6521327014218009,
1306
+ "grad_norm": 0.7763936519622803,
1307
+ "learning_rate": 2.796271585789778e-05,
1308
+ "loss": 0.4526,
1309
+ "step": 1720
1310
+ },
1311
+ {
1312
+ "epoch": 0.6559241706161137,
1313
+ "grad_norm": 0.6862651705741882,
1314
+ "learning_rate": 2.7419531765930324e-05,
1315
+ "loss": 0.3974,
1316
+ "step": 1730
1317
+ },
1318
+ {
1319
+ "epoch": 0.6597156398104266,
1320
+ "grad_norm": 0.6718897223472595,
1321
+ "learning_rate": 2.6879675026456553e-05,
1322
+ "loss": 0.3427,
1323
+ "step": 1740
1324
+ },
1325
+ {
1326
+ "epoch": 0.6635071090047393,
1327
+ "grad_norm": 0.5931557416915894,
1328
+ "learning_rate": 2.634322519024791e-05,
1329
+ "loss": 0.2467,
1330
+ "step": 1750
1331
+ },
1332
+ {
1333
+ "epoch": 0.6672985781990521,
1334
+ "grad_norm": 0.8312835097312927,
1335
+ "learning_rate": 2.58102613060505e-05,
1336
+ "loss": 0.6196,
1337
+ "step": 1760
1338
+ },
1339
+ {
1340
+ "epoch": 0.671090047393365,
1341
+ "grad_norm": 0.7139614224433899,
1342
+ "learning_rate": 2.5280861908936843e-05,
1343
+ "loss": 0.4495,
1344
+ "step": 1770
1345
+ },
1346
+ {
1347
+ "epoch": 0.6748815165876777,
1348
+ "grad_norm": 0.7517569065093994,
1349
+ "learning_rate": 2.4755105008733154e-05,
1350
+ "loss": 0.4067,
1351
+ "step": 1780
1352
+ },
1353
+ {
1354
+ "epoch": 0.6786729857819905,
1355
+ "grad_norm": 0.7141692638397217,
1356
+ "learning_rate": 2.4233068078524375e-05,
1357
+ "loss": 0.3508,
1358
+ "step": 1790
1359
+ },
1360
+ {
1361
+ "epoch": 0.6824644549763034,
1362
+ "grad_norm": 0.739683985710144,
1363
+ "learning_rate": 2.371482804323798e-05,
1364
+ "loss": 0.2417,
1365
+ "step": 1800
1366
+ },
1367
+ {
1368
+ "epoch": 0.6824644549763034,
1369
+ "eval_loss": 0.41478267312049866,
1370
+ "eval_runtime": 211.5519,
1371
+ "eval_samples_per_second": 20.997,
1372
+ "eval_steps_per_second": 5.252,
1373
+ "step": 1800
1374
  }
1375
  ],
1376
  "logging_steps": 10,
 
1399
  "attributes": {}
1400
  }
1401
  },
1402
+ "total_flos": 1.5975158798918615e+18,
1403
  "train_batch_size": 8,
1404
  "trial_name": null,
1405
  "trial_params": null