HiTruong commited on
Commit
ab06306
·
verified ·
1 Parent(s): 67a4f41

Training in progress, epoch 12, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83eed0b67e67cfdbe52d04871103cae3103859d7d9cfe67374ee93f232e266fd
3
  size 30026872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c38ef6ff821f73509cf96cb6d570b574a556c60551e1a3fd64d3062032c66ef1
3
  size 30026872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16f3819af9c66b1495d202b7a0cd46980e26aa5720da329551fcde9d995aa2ce
3
  size 60252034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27210ffee9c882ecee501679082f3633d3297bc1de0bb800dadc3eea8a8cafe8
3
  size 60252034
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aa7df125f391c87450d9bb2389c1fe3e96c3193557f2f91c667f9ea87f188dc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12603df9abfa498776dd4507dacec9d6818946d74ccd08dae235342b9b938e6f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:278dcd64427a0b406f001bf45cadd355b7ba0b85ec02beb82878896f7ad99989
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a3d3963bcbd050078a2c68d817121cba1d8d2caa3515782b9d593a8cf72958
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.997987927565392,
5
  "eval_steps": 500,
6
- "global_step": 2728,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -196,6 +196,20 @@
196
  "learning_rate": 0.0002,
197
  "loss": 0.2066,
198
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  }
200
  ],
201
  "logging_steps": 100,
@@ -215,7 +229,7 @@
215
  "attributes": {}
216
  }
217
  },
218
- "total_flos": 4.1122779841010074e+17,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
  "eval_steps": 500,
6
+ "global_step": 2977,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
196
  "learning_rate": 0.0002,
197
  "loss": 0.2066,
198
  "step": 2700
199
+ },
200
+ {
201
+ "epoch": 11.287726358148893,
202
+ "grad_norm": 0.4034820795059204,
203
+ "learning_rate": 0.0002,
204
+ "loss": 0.1857,
205
+ "step": 2800
206
+ },
207
+ {
208
+ "epoch": 11.690140845070422,
209
+ "grad_norm": 0.44169268012046814,
210
+ "learning_rate": 0.0002,
211
+ "loss": 0.1859,
212
+ "step": 2900
213
  }
214
  ],
215
  "logging_steps": 100,
 
229
  "attributes": {}
230
  }
231
  },
232
+ "total_flos": 4.486806508582994e+17,
233
  "train_batch_size": 4,
234
  "trial_name": null,
235
  "trial_params": null