MohamedAhmedAE commited on
Commit
2ec4c28
1 Parent(s): 205392b

Training in progress, step 8800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4533d0cc5d4d50b82cfb5df3d5e25daabf3844635d37d8a1136dc9efe3ae8cbf
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0be03972159ec159a56e3e77ca8d38e2168af9dcdd4df0d68963ed1c60e6965
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c5d42eb180a29ed66f0b671c8c70e4743c5ad821b1415728c0f72958e41f51
3
  size 84581014
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd5bd046dc3465f6df5b72e0a2552c5b0fda402004d2a228232c184835c93f0
3
  size 84581014
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46108bf4b002a3517fd8a49b94b3b4ed59fda2273b5a44b5a4115826d33fd2b4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85904f7c1b5e0d81333d46dd0befeb978507214982bf15efdfa380ed0f5f6789
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdd0fb75028b56249c2d8c029d1a64d4b86a271bd278ff63d1486066e1925aa9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a757a49f50d9542b04e522a04786aa578e7821acc2014c546ec09d428c28168
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006395502325806224,
5
  "eval_steps": 2000,
6
- "global_step": 8600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -308,6 +308,13 @@
308
  "learning_rate": 1.99999194681957e-05,
309
  "loss": 1.5581,
310
  "step": 8600
 
 
 
 
 
 
 
311
  }
312
  ],
313
  "logging_steps": 200,
@@ -315,7 +322,7 @@
315
  "num_input_tokens_seen": 0,
316
  "num_train_epochs": 5,
317
  "save_steps": 200,
318
- "total_flos": 1.1254267984893542e+17,
319
  "train_batch_size": 1,
320
  "trial_name": null,
321
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006544234938034276,
5
  "eval_steps": 2000,
6
+ "global_step": 8800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
308
  "learning_rate": 1.99999194681957e-05,
309
  "loss": 1.5581,
310
  "step": 8600
311
+ },
312
+ {
313
+ "epoch": 0.01,
314
+ "grad_norm": 2.273376703262329,
315
+ "learning_rate": 1.9999915674072735e-05,
316
+ "loss": 1.6099,
317
+ "step": 8800
318
  }
319
  ],
320
  "logging_steps": 200,
 
322
  "num_input_tokens_seen": 0,
323
  "num_train_epochs": 5,
324
  "save_steps": 200,
325
+ "total_flos": 1.1521698789135974e+17,
326
  "train_batch_size": 1,
327
  "trial_name": null,
328
  "trial_params": null