farmery commited on
Commit
2881d0a
·
verified ·
1 Parent(s): 2a267a2

Training in progress, step 86, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f1d892b2366386761f0a72e1416c3a2e5e8a2c42705893fa8dc14c92a4967e
3
  size 50624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:030eaddc6f6e92696098f138d1d6f183ae1feab61caf59dad9121a005f8a739a
3
  size 50624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fce9ad8826004310c683743744f592950aa99030e99c0a773d97fb67c7cc572
3
  size 118090
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73e75b1442d6f68ae9ec14ea3d9bb12f9dedda9bc5c41de5f976e4bb49cd2095
3
  size 118090
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a124254e9255126713591c1f59a9c70d9b2a575d52070bed3b63df36570aa65
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b4fed55d7442ec4f04242a594d7c198315a7d44290c90ffaea764d86aad661
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf20479de077e1f12a1a0a2558891d641a7cd7f83617558364eb5fab3cecf50
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb08bb8a586d27643dcf86a9f8306def51a9136f80f5802d3aee00dc499c85b7
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d32bbee8c1a3203e5781cafb76382ca878bef03f828d2544598d5b9e442e046
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41209214eef4e8962e9e3297d4c43c9981f0d838e4478bdc95bb660b06addb87
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44022d716771debca474b8d0807aaa6d48fb5a1f2aa65fce603e5767b76f790b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c58fa9498e2c74f0eb38ae93ff5adde3fe07458cf51af1baaaa3f0aa96d3fb9
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c40e70c89071663742fb7f45e3d35ba3e327e558255bec76dafa8c4bde0527
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3515ffb61bbd99b16f0cf41af74761fa2ee8d9e372c8ce4c68c7f0ba42572ed2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.6315789473684212,
5
  "eval_steps": 8,
6
- "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -612,6 +612,91 @@
612
  "learning_rate": 5.080450905401057e-06,
613
  "loss": 10.364,
614
  "step": 75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  }
616
  ],
617
  "logging_steps": 1,
@@ -626,12 +711,12 @@
626
  "should_evaluate": false,
627
  "should_log": false,
628
  "should_save": true,
629
- "should_training_stop": false
630
  },
631
  "attributes": {}
632
  }
633
  },
634
- "total_flos": 15690733977600.0,
635
  "train_batch_size": 8,
636
  "trial_name": null,
637
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.017543859649123,
5
  "eval_steps": 8,
6
+ "global_step": 86,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
612
  "learning_rate": 5.080450905401057e-06,
613
  "loss": 10.364,
614
  "step": 75
615
+ },
616
+ {
617
+ "epoch": 2.6666666666666665,
618
+ "grad_norm": 0.04788883775472641,
619
+ "learning_rate": 4.2113336672471245e-06,
620
+ "loss": 10.3642,
621
+ "step": 76
622
+ },
623
+ {
624
+ "epoch": 2.7017543859649122,
625
+ "grad_norm": 0.05305058881640434,
626
+ "learning_rate": 3.420445597436056e-06,
627
+ "loss": 10.3684,
628
+ "step": 77
629
+ },
630
+ {
631
+ "epoch": 2.736842105263158,
632
+ "grad_norm": 0.04230741783976555,
633
+ "learning_rate": 2.7091379149682685e-06,
634
+ "loss": 10.3681,
635
+ "step": 78
636
+ },
637
+ {
638
+ "epoch": 2.7719298245614032,
639
+ "grad_norm": 0.04892972111701965,
640
+ "learning_rate": 2.0786258770873647e-06,
641
+ "loss": 10.3648,
642
+ "step": 79
643
+ },
644
+ {
645
+ "epoch": 2.807017543859649,
646
+ "grad_norm": 0.05191851034760475,
647
+ "learning_rate": 1.5299867030334814e-06,
648
+ "loss": 10.3678,
649
+ "step": 80
650
+ },
651
+ {
652
+ "epoch": 2.807017543859649,
653
+ "eval_loss": 10.36169147491455,
654
+ "eval_runtime": 0.0484,
655
+ "eval_samples_per_second": 1983.67,
656
+ "eval_steps_per_second": 61.99,
657
+ "step": 80
658
+ },
659
+ {
660
+ "epoch": 2.8421052631578947,
661
+ "grad_norm": 0.05835256725549698,
662
+ "learning_rate": 1.064157733632276e-06,
663
+ "loss": 10.3622,
664
+ "step": 81
665
+ },
666
+ {
667
+ "epoch": 2.8771929824561404,
668
+ "grad_norm": 0.051547639071941376,
669
+ "learning_rate": 6.819348298638839e-07,
670
+ "loss": 10.3642,
671
+ "step": 82
672
+ },
673
+ {
674
+ "epoch": 2.912280701754386,
675
+ "grad_norm": 0.05024786293506622,
676
+ "learning_rate": 3.839710131477492e-07,
677
+ "loss": 10.3686,
678
+ "step": 83
679
+ },
680
+ {
681
+ "epoch": 2.9473684210526314,
682
+ "grad_norm": 0.05746513977646828,
683
+ "learning_rate": 1.7077534966650766e-07,
684
+ "loss": 10.3635,
685
+ "step": 84
686
+ },
687
+ {
688
+ "epoch": 2.982456140350877,
689
+ "grad_norm": 0.05468269810080528,
690
+ "learning_rate": 4.2712080634949024e-08,
691
+ "loss": 10.3656,
692
+ "step": 85
693
+ },
694
+ {
695
+ "epoch": 3.017543859649123,
696
+ "grad_norm": 0.0812983587384224,
697
+ "learning_rate": 0.0,
698
+ "loss": 15.6424,
699
+ "step": 86
700
  }
701
  ],
702
  "logging_steps": 1,
 
711
  "should_evaluate": false,
712
  "should_log": false,
713
  "should_save": true,
714
+ "should_training_stop": true
715
  },
716
  "attributes": {}
717
  }
718
  },
719
+ "total_flos": 17992041627648.0,
720
  "train_batch_size": 8,
721
  "trial_name": null,
722
  "trial_params": null