MohamedAhmedAE commited on
Commit
1b1b6f4
1 Parent(s): ffc2d10

Training in progress, step 5600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c384964f20b4483c2dce965e9c67e9ca152e97453c6fa5c988961571c39a6be0
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30737d4c22b4b26e4123c801ceb26aa4e876b6d95f0b62cad4d768ed46f2ce6
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed612f91b824abf83f39b9cc7a7625fa32c562e6a3aa09fb728224cdfad742d
3
  size 84581014
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4b879409f22641359b928d1e77271642a5b4639b676ff813e6ec4fed297a16
3
  size 84581014
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99397a5b0758fb455e37af8d56924ff252bf4671d377fdcae94b0d33fb047f96
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a1c117c19e73bcd669e165d231b727aab93d182a0e0acc0ec7506cb4907381
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:462980f0e92909d0da061075e591b7887d61fb7e5de134327d9d3fd672ebeb16
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daecd8e9a7fb8ab89776f0aa83b998d3983e03adcf54b58e801c6e6de09593a8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.004015780530157396,
5
  "eval_steps": 2000,
6
- "global_step": 5400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -196,6 +196,13 @@
196
  "learning_rate": 1.9999968297103373e-05,
197
  "loss": 1.5909,
198
  "step": 5400
 
 
 
 
 
 
 
199
  }
200
  ],
201
  "logging_steps": 200,
@@ -203,7 +210,7 @@
203
  "num_input_tokens_seen": 0,
204
  "num_train_epochs": 5,
205
  "save_steps": 200,
206
- "total_flos": 7.091041430315827e+16,
207
  "train_batch_size": 1,
208
  "trial_name": null,
209
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.004164513142385448,
5
  "eval_steps": 2000,
6
+ "global_step": 5600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
196
  "learning_rate": 1.9999968297103373e-05,
197
  "loss": 1.5909,
198
  "step": 5400
199
+ },
200
+ {
201
+ "epoch": 0.0,
202
+ "grad_norm": 4.743027210235596,
203
+ "learning_rate": 1.999996590028264e-05,
204
+ "loss": 1.5651,
205
+ "step": 5600
206
  }
207
  ],
208
  "logging_steps": 200,
 
210
  "num_input_tokens_seen": 0,
211
  "num_train_epochs": 5,
212
  "save_steps": 200,
213
+ "total_flos": 7.351449119465472e+16,
214
  "train_batch_size": 1,
215
  "trial_name": null,
216
  "trial_params": null