HiTruong commited on
Commit
ccaef97
·
verified ·
1 Parent(s): f54b6a3

Training in progress, epoch 14, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d129595fae7d6a599b3385944328c2ca5a535960773b57f6793b5d9302d90b4b
3
  size 30026872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8755cbe7e2398d1959416d6feb911384858a351204adb4f45a8e698f8710fd55
3
  size 30026872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15dd934ee573a4e96c2e6c32fd035d243501c3f44f5e51d002ee25e6a40619d0
3
  size 60252034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66a5ce1a780e3e1ff27f566dc0f0ffc09cedbb81984f67263cfbe2c21bb457c
3
  size 60252034
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5efb80898d149eb1b696384fcec14a440610378f5ac9545b1364e732b1989466
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:235ab0e10f5c7cdfbbd49cc3926a6ca3af658975caba14cd01311559832e2b6e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd6d49e54c475078e98e61f59f66d9d4d233cbd00636e275a4c3ce7ac559441f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bbd78e2bc5c115f006119ec58fcb69b8d548f43ab2f01e37b0763759baab3bc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.997987927565392,
5
  "eval_steps": 500,
6
- "global_step": 3225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -231,6 +231,20 @@
231
  "learning_rate": 0.0002,
232
  "loss": 0.1706,
233
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ],
236
  "logging_steps": 100,
@@ -250,7 +264,7 @@
250
  "attributes": {}
251
  }
252
  },
253
- "total_flos": 4.8613350330649805e+17,
254
  "train_batch_size": 4,
255
  "trial_name": null,
256
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
  "eval_steps": 500,
6
+ "global_step": 3474,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
231
  "learning_rate": 0.0002,
232
  "loss": 0.1706,
233
  "step": 3200
234
+ },
235
+ {
236
+ "epoch": 13.299798792756539,
237
+ "grad_norm": 0.42144036293029785,
238
+ "learning_rate": 0.0002,
239
+ "loss": 0.1563,
240
+ "step": 3300
241
+ },
242
+ {
243
+ "epoch": 13.702213279678068,
244
+ "grad_norm": 0.40393030643463135,
245
+ "learning_rate": 0.0002,
246
+ "loss": 0.1555,
247
+ "step": 3400
248
  }
249
  ],
250
  "logging_steps": 100,
 
264
  "attributes": {}
265
  }
266
  },
267
+ "total_flos": 5.235863557546967e+17,
268
  "train_batch_size": 4,
269
  "trial_name": null,
270
  "trial_params": null