ncbateman commited on
Commit
86dbdc2
·
verified ·
1 Parent(s): bbdb88c

Training in progress, step 405, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b5849099de6d64da9163882e434d47ddd3f52875f754b124b878b5d8d76b11b
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ba083b80a5ba86c78573397f43c5755304ed3a937a891778f373046687e85a
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1fedb2f50db670c47275e3ef8f58cd862a8dd1891a81406d1d6e8dbb0ca662b
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f7bdba3f023857413f670e1a267e69714b283f392e2ead25387966136304530
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e510d47805713e04f667951f9cdc1d488606c58294839c2a021b9c3bb3b8555
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c82db480424801d4bb33b59a50589e1c04688890a2475f7053ae263b3756f4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc533cf1bb7177235d15f56f9dac5f23ac9d59fa4a64c89a32769793623a6b44
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33cf1fc2a7a88cf2a60a50e566aaa7e8972cd330e2ff6eb55fa333ccfaf32fd5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.517631834357813,
5
  "eval_steps": 386,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2823,6 +2823,41 @@
2823
  "learning_rate": 9.793219548476753e-05,
2824
  "loss": 0.8706,
2825
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2826
  }
2827
  ],
2828
  "logging_steps": 1,
@@ -2842,7 +2877,7 @@
2842
  "attributes": {}
2843
  }
2844
  },
2845
- "total_flos": 4.471748686774272e+17,
2846
  "train_batch_size": 4,
2847
  "trial_name": null,
2848
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5241022322872857,
5
  "eval_steps": 386,
6
+ "global_step": 405,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2823
  "learning_rate": 9.793219548476753e-05,
2824
  "loss": 0.8706,
2825
  "step": 400
2826
+ },
2827
+ {
2828
+ "epoch": 0.5189259139437076,
2829
+ "grad_norm": 0.893902063369751,
2830
+ "learning_rate": 9.792044529138674e-05,
2831
+ "loss": 0.8217,
2832
+ "step": 401
2833
+ },
2834
+ {
2835
+ "epoch": 0.520219993529602,
2836
+ "grad_norm": 0.7412934899330139,
2837
+ "learning_rate": 9.79086625165405e-05,
2838
+ "loss": 0.868,
2839
+ "step": 402
2840
+ },
2841
+ {
2842
+ "epoch": 0.5215140731154966,
2843
+ "grad_norm": 0.796435534954071,
2844
+ "learning_rate": 9.789684716823995e-05,
2845
+ "loss": 0.8691,
2846
+ "step": 403
2847
+ },
2848
+ {
2849
+ "epoch": 0.5228081527013911,
2850
+ "grad_norm": 1.063193440437317,
2851
+ "learning_rate": 9.788499925451849e-05,
2852
+ "loss": 1.0085,
2853
+ "step": 404
2854
+ },
2855
+ {
2856
+ "epoch": 0.5241022322872857,
2857
+ "grad_norm": 0.952882707118988,
2858
+ "learning_rate": 9.787311878343157e-05,
2859
+ "loss": 0.8378,
2860
+ "step": 405
2861
  }
2862
  ],
2863
  "logging_steps": 1,
 
2877
  "attributes": {}
2878
  }
2879
  },
2880
+ "total_flos": 4.5276455453589504e+17,
2881
  "train_batch_size": 4,
2882
  "trial_name": null,
2883
  "trial_params": null