ncbateman commited on
Commit
e0b50ce
1 Parent(s): 368244d

Training in progress, step 705, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aabb06c1fe37ff25379e382b1f17082b07d34182a4fc2da6ffae8579ac4675e
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:276cc98327297733df76553f5e3b6d4860107114f0c0d09e76850d88f2e3e792
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d75409d4f4ef99a18e288d75b5c709cef3c171c10c32cf11b3494ebb2d324c6
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67e1bd4b745e410efa1702b658de1ecd4ec95ec3e117bd275ec883dc33e4fe8f
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db4a5aa1d65732ea6e6ad6ffbd33a4afe19476644f2bd043f99022469dab6bc0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d3d37bb63f7049bbb3b5a0e021e4d5be2c3c48a17668f2b1c66f33aefe4ecd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e88a0b12b39fadaa49a1f55d69192330694c5d8626f92166735ca7ee1b34dd9e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f8030d97101266fb0ab5c1ea76d734a66ed4913265b7bd97b3ca554de2e26fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9058557101261727,
5
  "eval_steps": 386,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4923,6 +4923,41 @@
4923
  "learning_rate": 9.298875414358399e-05,
4924
  "loss": 0.9095,
4925
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4926
  }
4927
  ],
4928
  "logging_steps": 1,
@@ -4942,7 +4977,7 @@
4942
  "attributes": {}
4943
  }
4944
  },
4945
- "total_flos": 7.825560201854976e+17,
4946
  "train_batch_size": 4,
4947
  "trial_name": null,
4948
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9123261080556454,
5
  "eval_steps": 386,
6
+ "global_step": 705,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4923
  "learning_rate": 9.298875414358399e-05,
4924
  "loss": 0.9095,
4925
  "step": 700
4926
+ },
4927
+ {
4928
+ "epoch": 0.9071497897120673,
4929
+ "grad_norm": 0.6941442489624023,
4930
+ "learning_rate": 9.296768538192853e-05,
4931
+ "loss": 0.8285,
4932
+ "step": 701
4933
+ },
4934
+ {
4935
+ "epoch": 0.9084438692979618,
4936
+ "grad_norm": 0.7753114700317383,
4937
+ "learning_rate": 9.294658740622573e-05,
4938
+ "loss": 0.9726,
4939
+ "step": 702
4940
+ },
4941
+ {
4942
+ "epoch": 0.9097379488838564,
4943
+ "grad_norm": 0.8224105834960938,
4944
+ "learning_rate": 9.292546023082025e-05,
4945
+ "loss": 0.8728,
4946
+ "step": 703
4947
+ },
4948
+ {
4949
+ "epoch": 0.9110320284697508,
4950
+ "grad_norm": 0.9305656552314758,
4951
+ "learning_rate": 9.29043038700766e-05,
4952
+ "loss": 0.891,
4953
+ "step": 704
4954
+ },
4955
+ {
4956
+ "epoch": 0.9123261080556454,
4957
+ "grad_norm": 0.7868937849998474,
4958
+ "learning_rate": 9.288311833837917e-05,
4959
+ "loss": 0.8883,
4960
+ "step": 705
4961
  }
4962
  ],
4963
  "logging_steps": 1,
 
4977
  "attributes": {}
4978
  }
4979
  },
4980
+ "total_flos": 7.881457060439654e+17,
4981
  "train_batch_size": 4,
4982
  "trial_name": null,
4983
  "trial_params": null