leixa commited on
Commit
7768263
·
verified ·
1 Parent(s): 4611f25

Training in progress, step 170, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a76f914b47be02a1f65f43bf3479c04fbd79e2f88b877c3b16a685626b7ad331
3
  size 692136856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7273ba923906b04b87992eae4005ab0c8dc4da4808ad7e8a3b8ab902f05d901
3
  size 692136856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f257dd9e0289db79f785ec4853e1f1fd36aff06829c616d2869899bfbaee54d
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b08f6a6bee912a14eac6fe7863008999f3647480e4a08bce79f58c1b92e19e
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99174d59b2f745e4fca8944695e995f5b82e2541faeac9696a55b797d641e61d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34186b94f008aa4de4418533e839ad95cc0707e02f4933eee512f4e8d5d15989
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f676b917baa0a895f62d4ea12d985f19fe259f840a0ba6d41d00cece68314f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21ce5519aba36efeb75a8dad39ab6bd85bd42d0ae24cbc1f5cfa5d96741b8bc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.009163802978235968,
5
  "eval_steps": 34,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -362,6 +362,91 @@
362
  "eval_samples_per_second": 14.026,
363
  "eval_steps_per_second": 1.754,
364
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  }
366
  ],
367
  "logging_steps": 3,
@@ -381,7 +466,7 @@
381
  "attributes": {}
382
  }
383
  },
384
- "total_flos": 1.912513331629916e+17,
385
  "train_batch_size": 8,
386
  "trial_name": null,
387
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.011454753722794959,
5
  "eval_steps": 34,
6
+ "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
362
  "eval_samples_per_second": 14.026,
363
  "eval_steps_per_second": 1.754,
364
  "step": 136
365
+ },
366
+ {
367
+ "epoch": 0.009298564786739438,
368
+ "grad_norm": 0.6918753981590271,
369
+ "learning_rate": 3.784717029321922e-05,
370
+ "loss": 0.8194,
371
+ "step": 138
372
+ },
373
+ {
374
+ "epoch": 0.009500707499494642,
375
+ "grad_norm": 0.7483247518539429,
376
+ "learning_rate": 3.732519254757344e-05,
377
+ "loss": 0.8422,
378
+ "step": 141
379
+ },
380
+ {
381
+ "epoch": 0.009702850212249849,
382
+ "grad_norm": 0.7642280459403992,
383
+ "learning_rate": 3.679601723656205e-05,
384
+ "loss": 0.8222,
385
+ "step": 144
386
+ },
387
+ {
388
+ "epoch": 0.009904992925005053,
389
+ "grad_norm": 0.7145370244979858,
390
+ "learning_rate": 3.625995338366492e-05,
391
+ "loss": 0.8073,
392
+ "step": 147
393
+ },
394
+ {
395
+ "epoch": 0.01010713563776026,
396
+ "grad_norm": 0.732183039188385,
397
+ "learning_rate": 3.5717314035076355e-05,
398
+ "loss": 0.8163,
399
+ "step": 150
400
+ },
401
+ {
402
+ "epoch": 0.010309278350515464,
403
+ "grad_norm": 0.6954637765884399,
404
+ "learning_rate": 3.516841607689501e-05,
405
+ "loss": 0.7573,
406
+ "step": 153
407
+ },
408
+ {
409
+ "epoch": 0.01051142106327067,
410
+ "grad_norm": 0.7373840808868408,
411
+ "learning_rate": 3.461358005007128e-05,
412
+ "loss": 0.7868,
413
+ "step": 156
414
+ },
415
+ {
416
+ "epoch": 0.010713563776025874,
417
+ "grad_norm": 0.7047626376152039,
418
+ "learning_rate": 3.405312996322042e-05,
419
+ "loss": 0.821,
420
+ "step": 159
421
+ },
422
+ {
423
+ "epoch": 0.010915706488781079,
424
+ "grad_norm": 0.7702988982200623,
425
+ "learning_rate": 3.348739310341068e-05,
426
+ "loss": 0.8194,
427
+ "step": 162
428
+ },
429
+ {
430
+ "epoch": 0.011117849201536285,
431
+ "grad_norm": 0.7867685556411743,
432
+ "learning_rate": 3.2916699845036816e-05,
433
+ "loss": 0.7898,
434
+ "step": 165
435
+ },
436
+ {
437
+ "epoch": 0.01131999191429149,
438
+ "grad_norm": 0.7021005153656006,
439
+ "learning_rate": 3.234138345689077e-05,
440
+ "loss": 0.7621,
441
+ "step": 168
442
+ },
443
+ {
444
+ "epoch": 0.011454753722794959,
445
+ "eval_loss": 0.8163909316062927,
446
+ "eval_runtime": 1780.9274,
447
+ "eval_samples_per_second": 14.035,
448
+ "eval_steps_per_second": 1.755,
449
+ "step": 170
450
  }
451
  ],
452
  "logging_steps": 3,
 
466
  "attributes": {}
467
  }
468
  },
469
+ "total_flos": 2.3906416645373952e+17,
470
  "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null