leixa commited on
Commit
f7a423c
·
verified ·
1 Parent(s): 05f7268

Training in progress, step 204, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7273ba923906b04b87992eae4005ab0c8dc4da4808ad7e8a3b8ab902f05d901
3
  size 692136856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13acbad963ec40074c567d0d1784741f6961767387ad3467d2cb553791d2e54e
3
  size 692136856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4b08f6a6bee912a14eac6fe7863008999f3647480e4a08bce79f58c1b92e19e
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a427fb02e64b2e9c07be952273516c0846d810883556edaa06d8b17464ff05
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34186b94f008aa4de4418533e839ad95cc0707e02f4933eee512f4e8d5d15989
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb6833ca1600bf131a99c9282df1f52267ed44a81a8c0477f031f940e545f8af
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f21ce5519aba36efeb75a8dad39ab6bd85bd42d0ae24cbc1f5cfa5d96741b8bc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9839d107756d9c8815de9164f2ebf92c05b3536704a349ca5892084df7663e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.011454753722794959,
5
  "eval_steps": 34,
6
- "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,98 @@
447
  "eval_samples_per_second": 14.035,
448
  "eval_steps_per_second": 1.755,
449
  "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 3,
@@ -466,7 +558,7 @@
466
  "attributes": {}
467
  }
468
  },
469
- "total_flos": 2.3906416645373952e+17,
470
  "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.013745704467353952,
5
  "eval_steps": 34,
6
+ "global_step": 204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 14.035,
448
  "eval_steps_per_second": 1.755,
449
  "step": 170
450
+ },
451
+ {
452
+ "epoch": 0.011522134627046696,
453
+ "grad_norm": 0.7096220850944519,
454
+ "learning_rate": 3.17617799075421e-05,
455
+ "loss": 0.7807,
456
+ "step": 171
457
+ },
458
+ {
459
+ "epoch": 0.0117242773398019,
460
+ "grad_norm": 0.7657400369644165,
461
+ "learning_rate": 3.1178227669141744e-05,
462
+ "loss": 0.7858,
463
+ "step": 174
464
+ },
465
+ {
466
+ "epoch": 0.011926420052557105,
467
+ "grad_norm": 0.8024412393569946,
468
+ "learning_rate": 3.0591067519763895e-05,
469
+ "loss": 0.8122,
470
+ "step": 177
471
+ },
472
+ {
473
+ "epoch": 0.01212856276531231,
474
+ "grad_norm": 0.6976025700569153,
475
+ "learning_rate": 3.0000642344401113e-05,
476
+ "loss": 0.8288,
477
+ "step": 180
478
+ },
479
+ {
480
+ "epoch": 0.012330705478067515,
481
+ "grad_norm": 0.6966779828071594,
482
+ "learning_rate": 2.9407296934729227e-05,
483
+ "loss": 0.793,
484
+ "step": 183
485
+ },
486
+ {
487
+ "epoch": 0.012532848190822721,
488
+ "grad_norm": 0.7219818830490112,
489
+ "learning_rate": 2.8811377787758636e-05,
490
+ "loss": 0.7883,
491
+ "step": 186
492
+ },
493
+ {
494
+ "epoch": 0.012734990903577926,
495
+ "grad_norm": 0.8189945816993713,
496
+ "learning_rate": 2.8213232903489865e-05,
497
+ "loss": 0.885,
498
+ "step": 189
499
+ },
500
+ {
501
+ "epoch": 0.01293713361633313,
502
+ "grad_norm": 0.902603805065155,
503
+ "learning_rate": 2.761321158169134e-05,
504
+ "loss": 0.8383,
505
+ "step": 192
506
+ },
507
+ {
508
+ "epoch": 0.013139276329088337,
509
+ "grad_norm": 0.8128630518913269,
510
+ "learning_rate": 2.7011664217918154e-05,
511
+ "loss": 0.852,
512
+ "step": 195
513
+ },
514
+ {
515
+ "epoch": 0.013341419041843541,
516
+ "grad_norm": 0.7031587958335876,
517
+ "learning_rate": 2.6408942098890936e-05,
518
+ "loss": 0.8622,
519
+ "step": 198
520
+ },
521
+ {
522
+ "epoch": 0.013543561754598747,
523
+ "grad_norm": 0.7614731788635254,
524
+ "learning_rate": 2.580539719735433e-05,
525
+ "loss": 0.8162,
526
+ "step": 201
527
+ },
528
+ {
529
+ "epoch": 0.013745704467353952,
530
+ "grad_norm": 0.6810929179191589,
531
+ "learning_rate": 2.5201381966534748e-05,
532
+ "loss": 0.8271,
533
+ "step": 204
534
+ },
535
+ {
536
+ "epoch": 0.013745704467353952,
537
+ "eval_loss": 0.8147265315055847,
538
+ "eval_runtime": 1782.1355,
539
+ "eval_samples_per_second": 14.025,
540
+ "eval_steps_per_second": 1.754,
541
+ "step": 204
542
  }
543
  ],
544
  "logging_steps": 3,
 
558
  "attributes": {}
559
  }
560
  },
561
+ "total_flos": 2.8687699974448742e+17,
562
  "train_batch_size": 8,
563
  "trial_name": null,
564
  "trial_params": null