rakhman-llm commited on
Commit
8b94d72
1 Parent(s): d9036d5

Training in progress, step 12500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd435d4c565278ca4fe642f0569217eae31db204692f8170a78cc060584eb864
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba4172139359a1d497324e8e22e9a06ebeca96155aa0037a8e9fe2bd0bb1861
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15f1632c64d76783d8907c3e92a7713d23c2df60b1e1a3079d9b3c7be3413a9f
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669022ae6cb17619adcdc439fe592ba5a330da40a4b03a681212031e9d3c7b25
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4f26f933cd5a30682ad42a3de6cd890f8aaa49723c34ab9ccdf6d24b30e628d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3dfe1e2f7f2fd5854887c0faffeef0e864d418e8da96df789b7b060dfeefbe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5d59de19a41db12fa634f2104bc781b1687cd88f74eced8badd275d850b3566
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db0dbd26a91dafa09171d3a40d283c5443302ab9d18cc5d9752c39b01fbce28
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.493765586034913,
5
  "eval_steps": 500,
6
- "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,6 +191,13 @@
191
  "learning_rate": 3.3832086450540318e-06,
192
  "loss": 0.317,
193
  "step": 12000
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 500,
@@ -210,7 +217,7 @@
210
  "attributes": {}
211
  }
212
  },
213
- "total_flos": 1.461377145765888e+16,
214
  "train_batch_size": 2,
215
  "trial_name": null,
216
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.597672485453034,
5
  "eval_steps": 500,
6
+ "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "learning_rate": 3.3832086450540318e-06,
192
  "loss": 0.317,
193
  "step": 12000
194
+ },
195
+ {
196
+ "epoch": 2.597672485453034,
197
+ "grad_norm": 1.4168757200241089,
198
+ "learning_rate": 2.690495982266556e-06,
199
+ "loss": 0.3176,
200
+ "step": 12500
201
  }
202
  ],
203
  "logging_steps": 500,
 
217
  "attributes": {}
218
  }
219
  },
220
+ "total_flos": 1.522272934821888e+16,
221
  "train_batch_size": 2,
222
  "trial_name": null,
223
  "trial_params": null