ncbateman commited on
Commit
41ae948
·
verified ·
1 Parent(s): c2125d6

Training in progress, step 255, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1281b650194f1fbbf19c77dce0688ee14dcd633838ccfe32e26ddcbfabc606a
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abad106b26a177c80a55998c565b87e8d8e322882f0ee0d82dfa1bc33ce78be
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20441cfd1a765ce19bd0c3aa3fa3f6568100e50441051ecd815972d170df6441
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15391496ce979fc31bfa4b89cf7a1b8ef5dc88ae859e2b82b3f5c1003f85f097
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1242a8006a137d7d7fdf5051a8f1d3180b356281e9f9b85b1f07ab3614b81f01
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92f080d6a96737273ff3f2c3aa9a0b978c372496e285abadb5ac87a0c88bd369
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e258cdfd2c36c8c2f0b10b58594d1a291f5e6c49df7a4537b7e3a4fcef2b8c0b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727d568946636c509efe7945204da61e120d33ba9eb30256473171e8dfb29ed3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3235198964736331,
5
  "eval_steps": 386,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1765,6 +1765,41 @@
1765
  "learning_rate": 9.93216320767545e-05,
1766
  "loss": 0.8878,
1767
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1768
  }
1769
  ],
1770
  "logging_steps": 1,
@@ -1784,7 +1819,7 @@
1784
  "attributes": {}
1785
  }
1786
  },
1787
- "total_flos": 2.79484292923392e+17,
1788
  "train_batch_size": 4,
1789
  "trial_name": null,
1790
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3299902944031058,
5
  "eval_steps": 386,
6
+ "global_step": 255,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1765
  "learning_rate": 9.93216320767545e-05,
1766
  "loss": 0.8878,
1767
  "step": 250
1768
+ },
1769
+ {
1770
+ "epoch": 0.32481397605952766,
1771
+ "grad_norm": 0.8709661960601807,
1772
+ "learning_rate": 9.93148470124375e-05,
1773
+ "loss": 0.8786,
1774
+ "step": 251
1775
+ },
1776
+ {
1777
+ "epoch": 0.32610805564542217,
1778
+ "grad_norm": 0.9593453407287598,
1779
+ "learning_rate": 9.930802841859095e-05,
1780
+ "loss": 1.0659,
1781
+ "step": 252
1782
+ },
1783
+ {
1784
+ "epoch": 0.3274021352313167,
1785
+ "grad_norm": 1.0130974054336548,
1786
+ "learning_rate": 9.93011762998509e-05,
1787
+ "loss": 0.9626,
1788
+ "step": 253
1789
+ },
1790
+ {
1791
+ "epoch": 0.3286962148172113,
1792
+ "grad_norm": 0.9949910640716553,
1793
+ "learning_rate": 9.929429066087616e-05,
1794
+ "loss": 1.0499,
1795
+ "step": 254
1796
+ },
1797
+ {
1798
+ "epoch": 0.3299902944031058,
1799
+ "grad_norm": 0.927542507648468,
1800
+ "learning_rate": 9.92873715063483e-05,
1801
+ "loss": 0.9571,
1802
+ "step": 255
1803
  }
1804
  ],
1805
  "logging_steps": 1,
 
1819
  "attributes": {}
1820
  }
1821
  },
1822
+ "total_flos": 2.8507397878185984e+17,
1823
  "train_batch_size": 4,
1824
  "trial_name": null,
1825
  "trial_params": null