rakhman-llm commited on
Commit
8c6f012
1 Parent(s): 6956e41

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96bd68ad012b25619d7bf581e4457855b3766a688abc4f029e6202ce077b6816
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c41971b4867deed95d379b57ad2e5a011ca77199c82e5fee2f2ffccccb2412
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b754627dd5bef36f869f0c293f197d1d9dab012f8b88d73f786ad25764d9c19
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6292782a68d81556401e2d237dfad643f2da1f77403214f8d4e613c6a76b21df
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e9bd9c0c5f3829618d66a8ccc40a2ee0bf94db351e00ed3ee919d3ea07ee90c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c0464ef7b8033abc039566dee85b9115e703e8f7542cc7dba40663c87de21b6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:589ca601b575cfc7f004c136eb91b382b9a1be92a7e3c7f68f79df4414805284
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:575df2908db98862a8edb86a2879682fc9cbe014aa13a1c0e8845ea12be8611c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.7015793848711556,
5
  "eval_steps": 500,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -205,6 +205,13 @@
205
  "learning_rate": 1.99778331947908e-06,
206
  "loss": 0.3389,
207
  "step": 13000
 
 
 
 
 
 
 
208
  }
209
  ],
210
  "logging_steps": 500,
@@ -224,7 +231,7 @@
224
  "attributes": {}
225
  }
226
  },
227
- "total_flos": 1.583168723877888e+16,
228
  "train_batch_size": 2,
229
  "trial_name": null,
230
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8054862842892767,
5
  "eval_steps": 500,
6
+ "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
205
  "learning_rate": 1.99778331947908e-06,
206
  "loss": 0.3389,
207
  "step": 13000
208
+ },
209
+ {
210
+ "epoch": 2.8054862842892767,
211
+ "grad_norm": 1.6529736518859863,
212
+ "learning_rate": 1.3050706566916044e-06,
213
+ "loss": 0.3163,
214
+ "step": 13500
215
  }
216
  ],
217
  "logging_steps": 500,
 
231
  "attributes": {}
232
  }
233
  },
234
+ "total_flos": 1.644064512933888e+16,
235
  "train_batch_size": 2,
236
  "trial_name": null,
237
  "trial_params": null