sujr commited on
Commit
ca2b5ff
1 Parent(s): 44dda9b

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a4d62d92c4b35ef29608ae3d385e2dbbd3bd2e44e1e9c274952fed6b383e48e
3
  size 469105640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a85eb22246650e7064af73ff633a6e5db5926d868fcace188a50b2339da322
3
  size 469105640
latest CHANGED
@@ -1 +1 @@
1
- global_step800
 
1
+ global_step1200
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a343dd93cd21bdc90d289f3ca48ab49de24b9f748799acb23184c62f5d2b505a
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5c385708fb05661a4a8830505a505fe5e9b78fa137b27d24db7b55c3109e66
3
  size 15920
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e78f906506770f43e59c54fef023c80264ba4db0c95909db5aa497d4875f1e32
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54a0e9ba9c486d9f2d3a1e1833dde7d1e5e24be602bbe39591f9ce42d6c1d9a2
3
  size 15920
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1801fc92aac20f4b2cd6c241493cc948c1ce8800b14797fdefee2b1f494d7b9f
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f01bf9dc1bbe7ddeac01f70fb90763087099c832e1eb46ff1d0a18b90cb42f0
3
  size 15920
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb7617cd6b75e491a348879fed069c07f2a2f52647a39a51812a3039227e011e
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:046d1a25a22d9a76cfa2bcfa7cf0f015d6f3fdda3ed5ca4852edd19999e520a4
3
  size 15920
rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2aa9e524787be3fd2130cbb1a33ce0d917090fdf18cf026905505c6c1f67c64
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bcd470f6b81981751baca34844a802fa863605a53d6e5c33cf9b95de794f264
3
  size 15920
rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:048ab222ccc631300416028b25a3132d82f849b7a32356b338d26e9eef8ae3fb
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633063a8621dab82d328021c44ce11580d5ca0bed894eac1db835cd5550054bf
3
  size 15920
rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9670c00e2e4b001bb5f458d57d181a0ae7bf4587cc05947eef4b84e438e4178
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:831f5f2bccebc23f6a90b480779eb8ce9444452ff9525537f298c2fe07f58208
3
  size 15920
rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da5ed04d69158bc88c3dc621620dae175703ddfab9924471e44fc939b4c4386c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb457efa4dbdf26174186fe237d62d8862065efcd11448470f26066ab373ab0
3
  size 15920
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:901dc2645bb26444439097220bce3343e3d0a315e276f271fbd122fb8170ad53
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748e88435c8297b9cdf1b7b8ccd7e64d6ff7fe7e782a39c2866f34b8b9e4e95f
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.12325706802249442,
5
  "eval_steps": 500,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -567,6 +567,286 @@
567
  "learning_rate": 2.9041681386148966e-05,
568
  "loss": 0.6447,
569
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  }
571
  ],
572
  "logging_steps": 10,
@@ -586,7 +866,7 @@
586
  "attributes": {}
587
  }
588
  },
589
- "total_flos": 7.289521573986304e+18,
590
  "train_batch_size": 4,
591
  "trial_name": null,
592
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.18488560203374163,
5
  "eval_steps": 500,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
567
  "learning_rate": 2.9041681386148966e-05,
568
  "loss": 0.6447,
569
  "step": 800
570
+ },
571
+ {
572
+ "epoch": 0.1247977813727756,
573
+ "grad_norm": 5.542855195057769,
574
+ "learning_rate": 2.9015718212400918e-05,
575
+ "loss": 0.6486,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 0.12633849472305678,
580
+ "grad_norm": 3.60243198052599,
581
+ "learning_rate": 2.8989419943632992e-05,
582
+ "loss": 0.6552,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 0.12787920807333797,
587
+ "grad_norm": 10.54124596601907,
588
+ "learning_rate": 2.896278720859776e-05,
589
+ "loss": 0.667,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 0.12941992142361913,
594
+ "grad_norm": 8.542824965925352,
595
+ "learning_rate": 2.8935820644044398e-05,
596
+ "loss": 0.697,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 0.13096063477390033,
601
+ "grad_norm": 6.327341591650607,
602
+ "learning_rate": 2.890852089470343e-05,
603
+ "loss": 0.65,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 0.1325013481241815,
608
+ "grad_norm": 7.687827002540841,
609
+ "learning_rate": 2.888088861327135e-05,
610
+ "loss": 0.6435,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 0.13404206147446268,
615
+ "grad_norm": 3.356453126127434,
616
+ "learning_rate": 2.885292446039499e-05,
617
+ "loss": 0.6721,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 0.13558277482474385,
622
+ "grad_norm": 8.406402473059597,
623
+ "learning_rate": 2.8824629104655736e-05,
624
+ "loss": 0.6694,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 0.13712348817502504,
629
+ "grad_norm": 11.653019434398818,
630
+ "learning_rate": 2.8796003222553558e-05,
631
+ "loss": 0.6531,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 0.13866420152530623,
636
+ "grad_norm": 6.371551478518258,
637
+ "learning_rate": 2.8767047498490798e-05,
638
+ "loss": 0.6568,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 0.1402049148755874,
643
+ "grad_norm": 26.71523757066426,
644
+ "learning_rate": 2.8737762624755846e-05,
645
+ "loss": 0.6857,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 0.1417456282258686,
650
+ "grad_norm": 4.417578021376778,
651
+ "learning_rate": 2.8708149301506573e-05,
652
+ "loss": 0.665,
653
+ "step": 920
654
+ },
655
+ {
656
+ "epoch": 0.14328634157614975,
657
+ "grad_norm": 5.335327649767265,
658
+ "learning_rate": 2.8678208236753577e-05,
659
+ "loss": 0.7014,
660
+ "step": 930
661
+ },
662
+ {
663
+ "epoch": 0.14482705492643094,
664
+ "grad_norm": 4.155520038033631,
665
+ "learning_rate": 2.8647940146343278e-05,
666
+ "loss": 0.6767,
667
+ "step": 940
668
+ },
669
+ {
670
+ "epoch": 0.1463677682767121,
671
+ "grad_norm": 3.815046866792752,
672
+ "learning_rate": 2.86173457539408e-05,
673
+ "loss": 0.6557,
674
+ "step": 950
675
+ },
676
+ {
677
+ "epoch": 0.1479084816269933,
678
+ "grad_norm": 3.7651811393538552,
679
+ "learning_rate": 2.8586425791012648e-05,
680
+ "loss": 0.675,
681
+ "step": 960
682
+ },
683
+ {
684
+ "epoch": 0.1494491949772745,
685
+ "grad_norm": 4.382966553943605,
686
+ "learning_rate": 2.8555180996809246e-05,
687
+ "loss": 0.6313,
688
+ "step": 970
689
+ },
690
+ {
691
+ "epoch": 0.15098990832755566,
692
+ "grad_norm": 10.103890347717131,
693
+ "learning_rate": 2.8523612118347245e-05,
694
+ "loss": 0.645,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 0.15253062167783685,
699
+ "grad_norm": 6.294074332584702,
700
+ "learning_rate": 2.8491719910391685e-05,
701
+ "loss": 0.659,
702
+ "step": 990
703
+ },
704
+ {
705
+ "epoch": 0.154071335028118,
706
+ "grad_norm": 2.5808531227457565,
707
+ "learning_rate": 2.845950513543791e-05,
708
+ "loss": 0.6688,
709
+ "step": 1000
710
+ },
711
+ {
712
+ "epoch": 0.1556120483783992,
713
+ "grad_norm": 2.927888770737132,
714
+ "learning_rate": 2.842696856369338e-05,
715
+ "loss": 0.6381,
716
+ "step": 1010
717
+ },
718
+ {
719
+ "epoch": 0.15715276172868037,
720
+ "grad_norm": 14.062433268070832,
721
+ "learning_rate": 2.8394110973059233e-05,
722
+ "loss": 0.6946,
723
+ "step": 1020
724
+ },
725
+ {
726
+ "epoch": 0.15869347507896156,
727
+ "grad_norm": 2.4470437840581054,
728
+ "learning_rate": 2.8360933149111695e-05,
729
+ "loss": 0.6844,
730
+ "step": 1030
731
+ },
732
+ {
733
+ "epoch": 0.16023418842924275,
734
+ "grad_norm": 3.8078577514013343,
735
+ "learning_rate": 2.8327435885083292e-05,
736
+ "loss": 0.64,
737
+ "step": 1040
738
+ },
739
+ {
740
+ "epoch": 0.16177490177952392,
741
+ "grad_norm": 25.680768915471432,
742
+ "learning_rate": 2.8293619981843887e-05,
743
+ "loss": 0.6329,
744
+ "step": 1050
745
+ },
746
+ {
747
+ "epoch": 0.1633156151298051,
748
+ "grad_norm": 5.0369491995422715,
749
+ "learning_rate": 2.8259486247881537e-05,
750
+ "loss": 0.6604,
751
+ "step": 1060
752
+ },
753
+ {
754
+ "epoch": 0.16485632848008627,
755
+ "grad_norm": 3.9026521516961608,
756
+ "learning_rate": 2.8225035499283155e-05,
757
+ "loss": 0.6564,
758
+ "step": 1070
759
+ },
760
+ {
761
+ "epoch": 0.16639704183036746,
762
+ "grad_norm": 4.945417598915296,
763
+ "learning_rate": 2.8190268559715017e-05,
764
+ "loss": 0.6655,
765
+ "step": 1080
766
+ },
767
+ {
768
+ "epoch": 0.16793775518064863,
769
+ "grad_norm": 3.222466850494984,
770
+ "learning_rate": 2.815518626040304e-05,
771
+ "loss": 0.6603,
772
+ "step": 1090
773
+ },
774
+ {
775
+ "epoch": 0.16947846853092982,
776
+ "grad_norm": 6.539136587655263,
777
+ "learning_rate": 2.811978944011293e-05,
778
+ "loss": 0.7036,
779
+ "step": 1100
780
+ },
781
+ {
782
+ "epoch": 0.171019181881211,
783
+ "grad_norm": 8.14211829139052,
784
+ "learning_rate": 2.8084078945130117e-05,
785
+ "loss": 0.6356,
786
+ "step": 1110
787
+ },
788
+ {
789
+ "epoch": 0.17255989523149218,
790
+ "grad_norm": 4.1954200205175605,
791
+ "learning_rate": 2.8048055629239543e-05,
792
+ "loss": 0.6591,
793
+ "step": 1120
794
+ },
795
+ {
796
+ "epoch": 0.17410060858177337,
797
+ "grad_norm": 4.333940585698679,
798
+ "learning_rate": 2.8011720353705224e-05,
799
+ "loss": 0.6575,
800
+ "step": 1130
801
+ },
802
+ {
803
+ "epoch": 0.17564132193205453,
804
+ "grad_norm": 4.2805487065333,
805
+ "learning_rate": 2.797507398724966e-05,
806
+ "loss": 0.6624,
807
+ "step": 1140
808
+ },
809
+ {
810
+ "epoch": 0.17718203528233573,
811
+ "grad_norm": 4.7173213185412,
812
+ "learning_rate": 2.7938117406033086e-05,
813
+ "loss": 0.623,
814
+ "step": 1150
815
+ },
816
+ {
817
+ "epoch": 0.1787227486326169,
818
+ "grad_norm": 99.71383370833006,
819
+ "learning_rate": 2.7900851493632508e-05,
820
+ "loss": 0.6591,
821
+ "step": 1160
822
+ },
823
+ {
824
+ "epoch": 0.18026346198289808,
825
+ "grad_norm": 3.6747989781213954,
826
+ "learning_rate": 2.786327714102058e-05,
827
+ "loss": 0.692,
828
+ "step": 1170
829
+ },
830
+ {
831
+ "epoch": 0.18180417533317927,
832
+ "grad_norm": 2.5009166944220604,
833
+ "learning_rate": 2.78253952465443e-05,
834
+ "loss": 0.6614,
835
+ "step": 1180
836
+ },
837
+ {
838
+ "epoch": 0.18334488868346044,
839
+ "grad_norm": 3.144011687958325,
840
+ "learning_rate": 2.7787206715903543e-05,
841
+ "loss": 0.6406,
842
+ "step": 1190
843
+ },
844
+ {
845
+ "epoch": 0.18488560203374163,
846
+ "grad_norm": 4.063731315051197,
847
+ "learning_rate": 2.7748712462129396e-05,
848
+ "loss": 0.6444,
849
+ "step": 1200
850
  }
851
  ],
852
  "logging_steps": 10,
 
866
  "attributes": {}
867
  }
868
  },
869
+ "total_flos": 1.0934282360979456e+19,
870
  "train_batch_size": 4,
871
  "trial_name": null,
872
  "trial_params": null