MohamedAhmedAE commited on
Commit
6a89fa7
1 Parent(s): df8d142

Training in progress, step 5200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3898deded357ae3f624f6da11d5a240cdabb03ebb6c20aaa1514ff2afc28f42
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32a8472f24ac0ae5777096edb75d3f2a4dcd1c6f71a5a93bc0a5ae4a7fe3dcf
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b6589201aabf4474a11e89f074f30710b9c14f130975414126d44fac37ab285
3
  size 84581014
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3289ab88db5dbcb3be2060676922612848d1ca284d404ff24a82083e0a9f0264
3
  size 84581014
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c2a46d21812f033b59f8253d10c55fd001488b63093ce2c9a28dd3b2087601c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f463b5f7795e26b9e24462f96b39a52b6b97b5adfc046ad7d24a04ed313a17
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dadb6c8212cca45778812f2e418e14e8bb1e227cbf578c8c3f0b8b32adfef7b7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1bdf201c735a2a3a0e28881f6c25b745bd1b24bb6cddaa05404a7fc4dbfa5a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0037183153057012927,
5
  "eval_steps": 2000,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -182,6 +182,13 @@
182
  "learning_rate": 1.999997281785647e-05,
183
  "loss": 1.5405,
184
  "step": 5000
 
 
 
 
 
 
 
185
  }
186
  ],
187
  "logging_steps": 200,
@@ -189,7 +196,7 @@
189
  "num_input_tokens_seen": 0,
190
  "num_train_epochs": 5,
191
  "save_steps": 200,
192
- "total_flos": 6.541947938699674e+16,
193
  "train_batch_size": 1,
194
  "trial_name": null,
195
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0038670479179293447,
5
  "eval_steps": 2000,
6
+ "global_step": 5200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
182
  "learning_rate": 1.999997281785647e-05,
183
  "loss": 1.5405,
184
  "step": 5000
185
+ },
186
+ {
187
+ "epoch": 0.0,
188
+ "grad_norm": 2.7667906284332275,
189
+ "learning_rate": 1.9999970595262297e-05,
190
+ "loss": 1.5714,
191
+ "step": 5200
192
  }
193
  ],
194
  "logging_steps": 200,
 
196
  "num_input_tokens_seen": 0,
197
  "num_train_epochs": 5,
198
  "save_steps": 200,
199
+ "total_flos": 6.809650430179738e+16,
200
  "train_batch_size": 1,
201
  "trial_name": null,
202
  "trial_params": null