bhuvanmdev commited on
Commit
bb61372
1 Parent(s): 4623331

Training in progress, step 2200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7865543f6aaefd525a4aa03a410cf4af463b4f3e6d2ae1912538913e26220b7
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbc4b1ee61a97280ee29a7f03d65f0615973d396283c0c1aa8a9c199445811fe
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d22e73707901787033065e05e6edbc269297b3683cfdbd102a29997da966992
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012e45346ca36f5757826a56705c7bc60bbc32817df65a5df6cb0bf846ce5a5f
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b1a9e3708d9d232a77e9549079bba51673bcb8586b714eb42370d58b85c5e90
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7cee742af04b6dcd02cfc87def2192f9af2a689dec2ee074b90937e1adbaf4e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b259303746656c555078a221a94fa56f648be2579b7ca4ced7c5648c5502f83
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19068f9fb75a389d67a2a6a34b0327497f2ac8cc34c17114479ba846240d9b4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7799642218246869,
5
  "eval_steps": 500,
6
- "global_step": 2180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1751,14 +1751,30 @@
1751
  "loss": 0.3971,
1752
  "num_input_tokens_seen": 1475721,
1753
  "step": 2180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1754
  }
1755
  ],
1756
  "logging_steps": 10,
1757
  "max_steps": 2795,
1758
- "num_input_tokens_seen": 1475721,
1759
  "num_train_epochs": 1,
1760
  "save_steps": 20,
1761
- "total_flos": 3.318375394066637e+16,
1762
  "train_batch_size": 1,
1763
  "trial_name": null,
1764
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7871198568872988,
5
  "eval_steps": 500,
6
+ "global_step": 2200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1751
  "loss": 0.3971,
1752
  "num_input_tokens_seen": 1475721,
1753
  "step": 2180
1754
+ },
1755
+ {
1756
+ "epoch": 0.7835420393559929,
1757
+ "grad_norm": 0.2836684584617615,
1758
+ "learning_rate": 4.329159212880143e-05,
1759
+ "loss": 0.3814,
1760
+ "num_input_tokens_seen": 1481691,
1761
+ "step": 2190
1762
+ },
1763
+ {
1764
+ "epoch": 0.7871198568872988,
1765
+ "grad_norm": 0.3486209809780121,
1766
+ "learning_rate": 4.257602862254025e-05,
1767
+ "loss": 0.4195,
1768
+ "num_input_tokens_seen": 1488275,
1769
+ "step": 2200
1770
  }
1771
  ],
1772
  "logging_steps": 10,
1773
  "max_steps": 2795,
1774
+ "num_input_tokens_seen": 1488275,
1775
  "num_train_epochs": 1,
1776
  "save_steps": 20,
1777
+ "total_flos": 3.34660490675712e+16,
1778
  "train_batch_size": 1,
1779
  "trial_name": null,
1780
  "trial_params": null