ncbateman commited on
Commit
11245d2
1 Parent(s): 5f9bce1

Training in progress, step 230, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18fdbc0c4f0a856bb851756d41e848c7f8da8cdd3c8bb06d2d6bed157cd6e47f
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9dc04d9131f70be891a19381878614c297c393a909fdfe2b429dde61e4fcefa
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:593b4b5069ea3e92bd5dd2bd90ebd76f7ec8bccb3efe79a2301c144ab29a07b4
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0cd8f2a46be5f132346e24f7e0d29bb24f82edd72eb9fee14693d4eecfd4237
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74a5cf5e988307b7bc5ac99195c5626f83509282efb2453403481b5a9c2074a3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe870df998feb02da0faa0e10164a513510cb4301dca9513fbf4cb6ab260322
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70b35dc00d38fbc94393b01bb750de67a9e73be3bb058ea334b16afbeab55729
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406b12b84118dfa4a046518d4e2a656e48bb88fb0ae0e36c6b540e90cacbc244
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2911679068262698,
5
  "eval_steps": 386,
6
- "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1590,6 +1590,41 @@
1590
  "learning_rate": 9.948034849431831e-05,
1591
  "loss": 0.939,
1592
  "step": 225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1593
  }
1594
  ],
1595
  "logging_steps": 1,
@@ -1609,7 +1644,7 @@
1609
  "attributes": {}
1610
  }
1611
  },
1612
- "total_flos": 2.515358636310528e+17,
1613
  "train_batch_size": 4,
1614
  "trial_name": null,
1615
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2976383047557425,
5
  "eval_steps": 386,
6
+ "global_step": 230,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1590
  "learning_rate": 9.948034849431831e-05,
1591
  "loss": 0.939,
1592
  "step": 225
1593
+ },
1594
+ {
1595
+ "epoch": 0.29246198641216437,
1596
+ "grad_norm": 0.9793195724487305,
1597
+ "learning_rate": 9.947440310820852e-05,
1598
+ "loss": 1.0998,
1599
+ "step": 226
1600
+ },
1601
+ {
1602
+ "epoch": 0.29375606599805887,
1603
+ "grad_norm": 0.8190125823020935,
1604
+ "learning_rate": 9.946842408408583e-05,
1605
+ "loss": 0.9606,
1606
+ "step": 227
1607
+ },
1608
+ {
1609
+ "epoch": 0.2950501455839534,
1610
+ "grad_norm": 0.8229602575302124,
1611
+ "learning_rate": 9.946241142601543e-05,
1612
+ "loss": 0.7944,
1613
+ "step": 228
1614
+ },
1615
+ {
1616
+ "epoch": 0.29634422516984793,
1617
+ "grad_norm": 0.8640865683555603,
1618
+ "learning_rate": 9.945636513808537e-05,
1619
+ "loss": 1.112,
1620
+ "step": 229
1621
+ },
1622
+ {
1623
+ "epoch": 0.2976383047557425,
1624
+ "grad_norm": 0.774501621723175,
1625
+ "learning_rate": 9.945028522440653e-05,
1626
+ "loss": 0.8986,
1627
+ "step": 230
1628
  }
1629
  ],
1630
  "logging_steps": 1,
 
1644
  "attributes": {}
1645
  }
1646
  },
1647
+ "total_flos": 2.5712554948952064e+17,
1648
  "train_batch_size": 4,
1649
  "trial_name": null,
1650
  "trial_params": null