MeedoSam commited on
Commit
5e04cfa
1 Parent(s): d2b9496

Uploaded checkpoint-5000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae99966885dfadca210314bf64872ce443f70308df6e4727adcc50f428ab66db
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19344674fe2ad15f50200034530413438a33bff9ccab8bfa6cf2812aa37bf12e
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2812ec63c28059aad0edb8123a9e90f5f8301e979f2372ce02fe039956e98169
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94e63a8e69076b7c52dde790e804072eacd8a18380eb10fffd62f19a4cfff1f
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b12fc07e36413d2b0b11012030944d448c215499606c7c88123ca1e537650ca8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abd2f2c053411bc4be9ca11b9a9a5f9be07dc02a0721eee3132129b1fc2a3d8
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f80b0441e18382140898e5947e4bf00161c8985bfd13094069daa8dad861cc8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8557a40bc707e1ef7c101859ab04d1c4c6b283598d6d9dc4f6cea13cb82e641e
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2532309861361322,
5
  "eval_steps": 100,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,156 @@
607
  "eval_samples_per_second": 5.185,
608
  "eval_steps_per_second": 5.185,
609
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 100,
@@ -614,7 +764,7 @@
614
  "num_input_tokens_seen": 0,
615
  "num_train_epochs": 2,
616
  "save_steps": 1000,
617
- "total_flos": 6.4408503975936e+16,
618
  "train_batch_size": 1,
619
  "trial_name": null,
620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5665387326701654,
5
  "eval_steps": 100,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 5.185,
608
  "eval_steps_per_second": 5.185,
609
  "step": 4000
610
+ },
611
+ {
612
+ "epoch": 1.28,
613
+ "grad_norm": 0.0578785166144371,
614
+ "learning_rate": 4.004444444444445e-06,
615
+ "loss": 0.0378,
616
+ "step": 4100
617
+ },
618
+ {
619
+ "epoch": 1.28,
620
+ "eval_loss": 0.018333839252591133,
621
+ "eval_runtime": 192.9576,
622
+ "eval_samples_per_second": 5.182,
623
+ "eval_steps_per_second": 5.182,
624
+ "step": 4100
625
+ },
626
+ {
627
+ "epoch": 1.32,
628
+ "grad_norm": 0.0014218598371371627,
629
+ "learning_rate": 3.5600000000000002e-06,
630
+ "loss": 0.0289,
631
+ "step": 4200
632
+ },
633
+ {
634
+ "epoch": 1.32,
635
+ "eval_loss": 0.02419031597673893,
636
+ "eval_runtime": 192.6293,
637
+ "eval_samples_per_second": 5.191,
638
+ "eval_steps_per_second": 5.191,
639
+ "step": 4200
640
+ },
641
+ {
642
+ "epoch": 1.35,
643
+ "grad_norm": 0.0013137555215507746,
644
+ "learning_rate": 3.1155555555555555e-06,
645
+ "loss": 0.0298,
646
+ "step": 4300
647
+ },
648
+ {
649
+ "epoch": 1.35,
650
+ "eval_loss": 0.02638879045844078,
651
+ "eval_runtime": 192.8273,
652
+ "eval_samples_per_second": 5.186,
653
+ "eval_steps_per_second": 5.186,
654
+ "step": 4300
655
+ },
656
+ {
657
+ "epoch": 1.38,
658
+ "grad_norm": 0.35259732604026794,
659
+ "learning_rate": 2.6711111111111116e-06,
660
+ "loss": 0.0382,
661
+ "step": 4400
662
+ },
663
+ {
664
+ "epoch": 1.38,
665
+ "eval_loss": 0.01607164740562439,
666
+ "eval_runtime": 192.7408,
667
+ "eval_samples_per_second": 5.188,
668
+ "eval_steps_per_second": 5.188,
669
+ "step": 4400
670
+ },
671
+ {
672
+ "epoch": 1.41,
673
+ "grad_norm": 0.0020605421159416437,
674
+ "learning_rate": 2.226666666666667e-06,
675
+ "loss": 0.0339,
676
+ "step": 4500
677
+ },
678
+ {
679
+ "epoch": 1.41,
680
+ "eval_loss": 0.014907135628163815,
681
+ "eval_runtime": 192.8289,
682
+ "eval_samples_per_second": 5.186,
683
+ "eval_steps_per_second": 5.186,
684
+ "step": 4500
685
+ },
686
+ {
687
+ "epoch": 1.44,
688
+ "grad_norm": 0.0019016048172488809,
689
+ "learning_rate": 1.7822222222222225e-06,
690
+ "loss": 0.0195,
691
+ "step": 4600
692
+ },
693
+ {
694
+ "epoch": 1.44,
695
+ "eval_loss": 0.015925556421279907,
696
+ "eval_runtime": 192.4215,
697
+ "eval_samples_per_second": 5.197,
698
+ "eval_steps_per_second": 5.197,
699
+ "step": 4600
700
+ },
701
+ {
702
+ "epoch": 1.47,
703
+ "grad_norm": 0.734219491481781,
704
+ "learning_rate": 1.337777777777778e-06,
705
+ "loss": 0.0328,
706
+ "step": 4700
707
+ },
708
+ {
709
+ "epoch": 1.47,
710
+ "eval_loss": 0.014890914782881737,
711
+ "eval_runtime": 192.4767,
712
+ "eval_samples_per_second": 5.195,
713
+ "eval_steps_per_second": 5.195,
714
+ "step": 4700
715
+ },
716
+ {
717
+ "epoch": 1.5,
718
+ "grad_norm": 2.157243251800537,
719
+ "learning_rate": 8.933333333333334e-07,
720
+ "loss": 0.0429,
721
+ "step": 4800
722
+ },
723
+ {
724
+ "epoch": 1.5,
725
+ "eval_loss": 0.01486047450453043,
726
+ "eval_runtime": 191.7644,
727
+ "eval_samples_per_second": 5.215,
728
+ "eval_steps_per_second": 5.215,
729
+ "step": 4800
730
+ },
731
+ {
732
+ "epoch": 1.54,
733
+ "grad_norm": 0.00161929486785084,
734
+ "learning_rate": 4.488888888888889e-07,
735
+ "loss": 0.0312,
736
+ "step": 4900
737
+ },
738
+ {
739
+ "epoch": 1.54,
740
+ "eval_loss": 0.02127786912024021,
741
+ "eval_runtime": 191.9957,
742
+ "eval_samples_per_second": 5.208,
743
+ "eval_steps_per_second": 5.208,
744
+ "step": 4900
745
+ },
746
+ {
747
+ "epoch": 1.57,
748
+ "grad_norm": 0.001571273198351264,
749
+ "learning_rate": 4.444444444444445e-09,
750
+ "loss": 0.0364,
751
+ "step": 5000
752
+ },
753
+ {
754
+ "epoch": 1.57,
755
+ "eval_loss": 0.01901436597108841,
756
+ "eval_runtime": 191.5985,
757
+ "eval_samples_per_second": 5.219,
758
+ "eval_steps_per_second": 5.219,
759
+ "step": 5000
760
  }
761
  ],
762
  "logging_steps": 100,
 
764
  "num_input_tokens_seen": 0,
765
  "num_train_epochs": 2,
766
  "save_steps": 1000,
767
+ "total_flos": 8.051062996992e+16,
768
  "train_batch_size": 1,
769
  "trial_name": null,
770
  "trial_params": null