Krish356 commited on
Commit
51e9264
·
verified ·
1 Parent(s): 52e305a

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78dac7535dac9a926aa3d6f1cede2677776554a31ae3b03733c9be34bb45feb8
3
  size 3380768360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6d8f0f796537366c6e78ca004befff9f9c27672a628bae1e611c8bc0f94c8c
3
  size 3380768360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ddc11eef2343db6f5ad7d3c768a98ae313ab5bdd1d33c62a12390009d4c92aa
3
  size 1757899449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd75d21ac4ac0a8645a72715f91e4d5f09c05dd5a2548ed04a8b49d623fc3a5
3
  size 1757899449
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97490d166ca8bc27bfa10807632f9ecb473b145cce74c93d287cde23f8af51fb
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5dc9af15ae765cffc21eeb6ddbc68a2629e47a5fc5164b3c35695e55c025ec4
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69b592d4efa2ff6d0584dfc8cf30049181a8d5c8977939386b39d5882c0a494e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213d03f95061a3291403e8d5572036299f2f6f739be51135e2941aff4f3ccff7
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.2508710801393728,
6
  "eval_steps": 30,
7
- "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -215,6 +215,56 @@
215
  "eval_samples_per_second": 0.291,
216
  "eval_steps_per_second": 0.073,
217
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  }
219
  ],
220
  "logging_steps": 5,
@@ -234,7 +284,7 @@
234
  "attributes": {}
235
  }
236
  },
237
- "total_flos": 7.763514566754386e+17,
238
  "train_batch_size": 8,
239
  "trial_name": null,
240
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.5644599303135889,
6
  "eval_steps": 30,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
215
  "eval_samples_per_second": 0.291,
216
  "eval_steps_per_second": 0.073,
217
  "step": 120
218
+ },
219
+ {
220
+ "epoch": 1.3031358885017421,
221
+ "grad_norm": 0.07795720547437668,
222
+ "learning_rate": 2.950898376017064e-05,
223
+ "loss": 0.1842,
224
+ "step": 125
225
+ },
226
+ {
227
+ "epoch": 1.3554006968641115,
228
+ "grad_norm": 0.07542526721954346,
229
+ "learning_rate": 2.573490187344596e-05,
230
+ "loss": 0.2031,
231
+ "step": 130
232
+ },
233
+ {
234
+ "epoch": 1.4076655052264808,
235
+ "grad_norm": 0.10047340393066406,
236
+ "learning_rate": 2.2133776843878186e-05,
237
+ "loss": 0.24,
238
+ "step": 135
239
+ },
240
+ {
241
+ "epoch": 1.4599303135888502,
242
+ "grad_norm": 0.13595731556415558,
243
+ "learning_rate": 1.873127678391816e-05,
244
+ "loss": 0.2808,
245
+ "step": 140
246
+ },
247
+ {
248
+ "epoch": 1.5121951219512195,
249
+ "grad_norm": 0.06210995092988014,
250
+ "learning_rate": 1.555165404621567e-05,
251
+ "loss": 0.235,
252
+ "step": 145
253
+ },
254
+ {
255
+ "epoch": 1.5644599303135889,
256
+ "grad_norm": 0.08401988446712494,
257
+ "learning_rate": 1.2617572357609564e-05,
258
+ "loss": 0.1849,
259
+ "step": 150
260
+ },
261
+ {
262
+ "epoch": 1.5644599303135889,
263
+ "eval_loss": 0.23435795307159424,
264
+ "eval_runtime": 1753.006,
265
+ "eval_samples_per_second": 0.291,
266
+ "eval_steps_per_second": 0.073,
267
+ "step": 150
268
  }
269
  ],
270
  "logging_steps": 5,
 
284
  "attributes": {}
285
  }
286
  },
287
+ "total_flos": 9.840854969157304e+17,
288
  "train_batch_size": 8,
289
  "trial_name": null,
290
  "trial_params": null