ncbateman commited on
Commit
51eebd3
·
verified ·
1 Parent(s): d3cdc25

Training in progress, step 195, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:673e0cd0d6be2cacd9fdbe1bf6f79ad95bd9fae922ec2bf88e1a8cd477cee5b9
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d753263aa81b1b8454592a6857435334e81996e06a4ba300add9b2dfb23768
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e9ca09e7df6f39976075d85646ed22483c4e258ed5192039fbb8a16a0177402
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9038d12d05746b9a6d82e5217c8510507bfbdc663715afa5583ab6868239418d
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0d0addcc11bf969393dd9428fea269c1f9808caf14a24eb1d95804a407c85d4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b359c5b6c1818e377053ca6ac8f02ef8f015748d4c9bdcdc6bf967222152a102
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85fc8999195dd78d475e82d99b83e2dc13cd79ae7e2ade006c6bcb65bfced59
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08bf85ff0f48b3d95c6198a52e97d2ada2b8ffebab00ffbb2f9654c1bbec3d72
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24587512131996117,
5
  "eval_steps": 386,
6
- "global_step": 190,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1345,6 +1345,41 @@
1345
  "learning_rate": 9.966721516310682e-05,
1346
  "loss": 0.9526,
1347
  "step": 190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1348
  }
1349
  ],
1350
  "logging_steps": 1,
@@ -1364,7 +1399,7 @@
1364
  "attributes": {}
1365
  }
1366
  },
1367
- "total_flos": 2.1240806262177792e+17,
1368
  "train_batch_size": 4,
1369
  "trial_name": null,
1370
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2523455192494338,
5
  "eval_steps": 386,
6
+ "global_step": 195,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1345
  "learning_rate": 9.966721516310682e-05,
1346
  "loss": 0.9526,
1347
  "step": 190
1348
+ },
1349
+ {
1350
+ "epoch": 0.2471692009058557,
1351
+ "grad_norm": 0.9828710556030273,
1352
+ "learning_rate": 9.966244949063316e-05,
1353
+ "loss": 0.8923,
1354
+ "step": 191
1355
+ },
1356
+ {
1357
+ "epoch": 0.24846328049175023,
1358
+ "grad_norm": 1.0729883909225464,
1359
+ "learning_rate": 9.965765005229248e-05,
1360
+ "loss": 1.0115,
1361
+ "step": 192
1362
+ },
1363
+ {
1364
+ "epoch": 0.24975736007764476,
1365
+ "grad_norm": 0.9844326972961426,
1366
+ "learning_rate": 9.965281685134796e-05,
1367
+ "loss": 0.9855,
1368
+ "step": 193
1369
+ },
1370
+ {
1371
+ "epoch": 0.2510514396635393,
1372
+ "grad_norm": 1.1593172550201416,
1373
+ "learning_rate": 9.96479498910857e-05,
1374
+ "loss": 1.0912,
1375
+ "step": 194
1376
+ },
1377
+ {
1378
+ "epoch": 0.2523455192494338,
1379
+ "grad_norm": 0.8835370540618896,
1380
+ "learning_rate": 9.964304917481482e-05,
1381
+ "loss": 0.9951,
1382
+ "step": 195
1383
  }
1384
  ],
1385
  "logging_steps": 1,
 
1399
  "attributes": {}
1400
  }
1401
  },
1402
+ "total_flos": 2.1799774848024576e+17,
1403
  "train_batch_size": 4,
1404
  "trial_name": null,
1405
  "trial_params": null