dimasik87 commited on
Commit
5f95a3e
·
verified ·
1 Parent(s): 0f176a7

Training in progress, step 35, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aee7894d4aca123e23ba91f91f7c665f60e2e515ea1b07fc759ba6921c3a4d39
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ee1664bd29ace74cd834abf6756c1c6ad85e31fbf6b86e8e410e92077635c6
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:045b4d334a6dbdfb7e3597f9a65f037170ed834e1072d8e6ab5d53b64f7b359a
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b31db628080555eedcc22be53e02e0d4a14ffe0bf6a0862b41c6b5703441cc5a
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97d585dc7c13382f351b584df6888cf4cf487689fda9286b06d48d18af07a1be
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866fac83e501a78c2e403a48d4ac843b9f01dc29bc6df7eeed2838bc1ca90679
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baae720e33260fead254c87141d85e241b839ae924033bfd9652fb777f3f1bf0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbf1d1277664600b4e977089813b848af48515edea03c4bdcf1a506540fabd37
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13143483023001096,
5
  "eval_steps": 5,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,49 @@
273
  "eval_samples_per_second": 9.413,
274
  "eval_steps_per_second": 4.755,
275
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 1,
@@ -292,7 +335,7 @@
292
  "attributes": {}
293
  }
294
  },
295
- "total_flos": 2.219477097775104e+16,
296
  "train_batch_size": 2,
297
  "trial_name": null,
298
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1533406352683461,
5
  "eval_steps": 5,
6
+ "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 9.413,
274
  "eval_steps_per_second": 4.755,
275
  "step": 30
276
+ },
277
+ {
278
+ "epoch": 0.13581599123767799,
279
+ "grad_norm": 0.0801151692867279,
280
+ "learning_rate": 9.215409042721552e-05,
281
+ "loss": 0.0121,
282
+ "step": 31
283
+ },
284
+ {
285
+ "epoch": 0.140197152245345,
286
+ "grad_norm": 0.050880130380392075,
287
+ "learning_rate": 8.435655349597689e-05,
288
+ "loss": 0.0048,
289
+ "step": 32
290
+ },
291
+ {
292
+ "epoch": 0.14457831325301204,
293
+ "grad_norm": 0.12199495732784271,
294
+ "learning_rate": 7.66554636144095e-05,
295
+ "loss": 0.0158,
296
+ "step": 33
297
+ },
298
+ {
299
+ "epoch": 0.14895947426067907,
300
+ "grad_norm": 0.04026995226740837,
301
+ "learning_rate": 6.909830056250527e-05,
302
+ "loss": 0.008,
303
+ "step": 34
304
+ },
305
+ {
306
+ "epoch": 0.1533406352683461,
307
+ "grad_norm": 0.054878607392311096,
308
+ "learning_rate": 6.173165676349103e-05,
309
+ "loss": 0.0049,
310
+ "step": 35
311
+ },
312
+ {
313
+ "epoch": 0.1533406352683461,
314
+ "eval_loss": 0.015133237466216087,
315
+ "eval_runtime": 10.3131,
316
+ "eval_samples_per_second": 9.406,
317
+ "eval_steps_per_second": 4.751,
318
+ "step": 35
319
  }
320
  ],
321
  "logging_steps": 1,
 
335
  "attributes": {}
336
  }
337
  },
338
+ "total_flos": 2.589389947404288e+16,
339
  "train_batch_size": 2,
340
  "trial_name": null,
341
  "trial_params": null