AlekseyKorshuk commited on
Commit
6d2c1bd
1 Parent(s): 4a39723

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1b0rnnbf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/qx8rcy66) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/qx8rcy66/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1u30jxqs/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/38khmsgm) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/38khmsgm/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -37,7 +37,7 @@
37
  }
38
  },
39
  "torch_dtype": "float32",
40
- "transformers_version": "4.17.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
 
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.18.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 0.2455403357744217, "eval_runtime": 13.8298, "eval_samples_per_second": 43.674, "eval_steps_per_second": 5.495, "epoch": 2.0}
 
1
+ {"eval_loss": 0.21034620702266693, "eval_runtime": 15.1752, "eval_samples_per_second": 41.779, "eval_steps_per_second": 5.272, "epoch": 6.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91f6459b49fddf722d9f858151797155a47bc8c1ea85a7b38322bbab947c2cca
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23feba72d0f43086e6c74b84adee8455b0785e580e0aab25abcd70b41dbf0158
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ee96f8a6e48147487cfb22ba654c29712017fab3f311ef29e0b92b111b5dcc9
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0307db0b2ba20c296872de4c51775c6f6f7c145d85ba4eba9883ae1714259f3e
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47644ff40a294931cd31f96b23b17629d42b2e26ea5369f56afd9709f6fe557e
3
- size 510404393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eff03ee26d2091ee260f5505223bb3ad8b0787140cfe1662d5bebb6562fe0de
3
+ size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8620a678022e3508118c53e7b72967ce161e332ca8a4bec3c57043c8d0ab815e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64318524f70467d7f035d4c29d2caf9c7a12a5d003c3d37e5cb258fa8e627862
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f05fbfb2124177f9afcf7ea2075c976a78e42036cfed8db1b7e37fe10af01dde
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9cf36b252c26cdc75b14e37b5b4fccbc15ca4a3843e9a29281707a78831c49
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.2455403357744217,
3
- "best_model_checkpoint": "output/eminem/checkpoint-460",
4
- "epoch": 1.0,
5
- "global_step": 460,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -566,11 +566,559 @@
566
  "eval_samples_per_second": 43.661,
567
  "eval_steps_per_second": 5.494,
568
  "step": 460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  }
570
  ],
571
- "max_steps": 920,
572
- "num_train_epochs": 2,
573
- "total_flos": 479993462784000.0,
574
  "trial_name": null,
575
  "trial_params": null
576
  }
 
1
  {
2
+ "best_metric": 0.21034620702266693,
3
+ "best_model_checkpoint": "output/eminem/checkpoint-912",
4
+ "epoch": 2.0,
5
+ "global_step": 912,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
566
  "eval_samples_per_second": 43.661,
567
  "eval_steps_per_second": 5.494,
568
  "step": 460
569
+ },
570
+ {
571
+ "epoch": 1.02,
572
+ "learning_rate": 0.0001310364026633813,
573
+ "loss": 0.4239,
574
+ "step": 465
575
+ },
576
+ {
577
+ "epoch": 1.03,
578
+ "learning_rate": 0.0001319781359302741,
579
+ "loss": 0.5115,
580
+ "step": 470
581
+ },
582
+ {
583
+ "epoch": 1.04,
584
+ "learning_rate": 0.00013284467114611735,
585
+ "loss": 0.4756,
586
+ "step": 475
587
+ },
588
+ {
589
+ "epoch": 1.05,
590
+ "learning_rate": 0.00013363498016834413,
591
+ "loss": 0.4892,
592
+ "step": 480
593
+ },
594
+ {
595
+ "epoch": 1.06,
596
+ "learning_rate": 0.00013434812529663595,
597
+ "loss": 0.5002,
598
+ "step": 485
599
+ },
600
+ {
601
+ "epoch": 1.07,
602
+ "learning_rate": 0.00013498326038550478,
603
+ "loss": 0.5714,
604
+ "step": 490
605
+ },
606
+ {
607
+ "epoch": 1.09,
608
+ "learning_rate": 0.00013553963184824178,
609
+ "loss": 0.5845,
610
+ "step": 495
611
+ },
612
+ {
613
+ "epoch": 1.1,
614
+ "learning_rate": 0.0001360165795510487,
615
+ "loss": 0.4911,
616
+ "step": 500
617
+ },
618
+ {
619
+ "epoch": 1.11,
620
+ "learning_rate": 0.00013641353759628265,
621
+ "loss": 0.5275,
622
+ "step": 505
623
+ },
624
+ {
625
+ "epoch": 1.12,
626
+ "learning_rate": 0.00013673003499389456,
627
+ "loss": 0.5292,
628
+ "step": 510
629
+ },
630
+ {
631
+ "epoch": 1.13,
632
+ "learning_rate": 0.00013696569622025749,
633
+ "loss": 0.5506,
634
+ "step": 515
635
+ },
636
+ {
637
+ "epoch": 1.14,
638
+ "learning_rate": 0.00013712024166372548,
639
+ "loss": 0.5552,
640
+ "step": 520
641
+ },
642
+ {
643
+ "epoch": 1.15,
644
+ "learning_rate": 0.0001371934879563911,
645
+ "loss": 0.5404,
646
+ "step": 525
647
+ },
648
+ {
649
+ "epoch": 1.16,
650
+ "learning_rate": 0.0001371853481916521,
651
+ "loss": 0.5364,
652
+ "step": 530
653
+ },
654
+ {
655
+ "epoch": 1.17,
656
+ "learning_rate": 0.0001370958320273255,
657
+ "loss": 0.4954,
658
+ "step": 535
659
+ },
660
+ {
661
+ "epoch": 1.18,
662
+ "learning_rate": 0.00013692504567418905,
663
+ "loss": 0.6414,
664
+ "step": 540
665
+ },
666
+ {
667
+ "epoch": 1.2,
668
+ "learning_rate": 0.0001366731917699612,
669
+ "loss": 0.5806,
670
+ "step": 545
671
+ },
672
+ {
673
+ "epoch": 1.21,
674
+ "learning_rate": 0.00013634056913887406,
675
+ "loss": 0.5283,
676
+ "step": 550
677
+ },
678
+ {
679
+ "epoch": 1.22,
680
+ "learning_rate": 0.0001359275724371159,
681
+ "loss": 0.5036,
682
+ "step": 555
683
+ },
684
+ {
685
+ "epoch": 1.23,
686
+ "learning_rate": 0.00013543469168457526,
687
+ "loss": 0.583,
688
+ "step": 560
689
+ },
690
+ {
691
+ "epoch": 1.24,
692
+ "learning_rate": 0.00013486251168343032,
693
+ "loss": 0.5384,
694
+ "step": 565
695
+ },
696
+ {
697
+ "epoch": 1.25,
698
+ "learning_rate": 0.0001342117113242847,
699
+ "loss": 0.5136,
700
+ "step": 570
701
+ },
702
+ {
703
+ "epoch": 1.26,
704
+ "learning_rate": 0.00013348306278066345,
705
+ "loss": 0.5162,
706
+ "step": 575
707
+ },
708
+ {
709
+ "epoch": 1.27,
710
+ "learning_rate": 0.0001326774305928314,
711
+ "loss": 0.4983,
712
+ "step": 580
713
+ },
714
+ {
715
+ "epoch": 1.28,
716
+ "learning_rate": 0.00013179577064201505,
717
+ "loss": 0.5491,
718
+ "step": 585
719
+ },
720
+ {
721
+ "epoch": 1.29,
722
+ "learning_rate": 0.00013083912901625498,
723
+ "loss": 0.4611,
724
+ "step": 590
725
+ },
726
+ {
727
+ "epoch": 1.3,
728
+ "learning_rate": 0.0001298086407692224,
729
+ "loss": 0.5359,
730
+ "step": 595
731
+ },
732
+ {
733
+ "epoch": 1.32,
734
+ "learning_rate": 0.00012870552857347723,
735
+ "loss": 0.5226,
736
+ "step": 600
737
+ },
738
+ {
739
+ "epoch": 1.33,
740
+ "learning_rate": 0.00012753110126977415,
741
+ "loss": 0.5316,
742
+ "step": 605
743
+ },
744
+ {
745
+ "epoch": 1.34,
746
+ "learning_rate": 0.00012628675231412197,
747
+ "loss": 0.5227,
748
+ "step": 610
749
+ },
750
+ {
751
+ "epoch": 1.35,
752
+ "learning_rate": 0.0001249739581244523,
753
+ "loss": 0.4961,
754
+ "step": 615
755
+ },
756
+ {
757
+ "epoch": 1.36,
758
+ "learning_rate": 0.00012359427632884786,
759
+ "loss": 0.5487,
760
+ "step": 620
761
+ },
762
+ {
763
+ "epoch": 1.37,
764
+ "learning_rate": 0.0001221493439174271,
765
+ "loss": 0.5994,
766
+ "step": 625
767
+ },
768
+ {
769
+ "epoch": 1.38,
770
+ "learning_rate": 0.00012064087530004776,
771
+ "loss": 0.5401,
772
+ "step": 630
773
+ },
774
+ {
775
+ "epoch": 1.39,
776
+ "learning_rate": 0.00011907066027217711,
777
+ "loss": 0.5615,
778
+ "step": 635
779
+ },
780
+ {
781
+ "epoch": 1.4,
782
+ "learning_rate": 0.00011744056189128525,
783
+ "loss": 0.5705,
784
+ "step": 640
785
+ },
786
+ {
787
+ "epoch": 1.41,
788
+ "learning_rate": 0.00011575251426634572,
789
+ "loss": 0.5663,
790
+ "step": 645
791
+ },
792
+ {
793
+ "epoch": 1.43,
794
+ "learning_rate": 0.00011400852026300672,
795
+ "loss": 0.5623,
796
+ "step": 650
797
+ },
798
+ {
799
+ "epoch": 1.44,
800
+ "learning_rate": 0.00011221064912720382,
801
+ "loss": 0.5898,
802
+ "step": 655
803
+ },
804
+ {
805
+ "epoch": 1.45,
806
+ "learning_rate": 0.00011036103402999931,
807
+ "loss": 0.5708,
808
+ "step": 660
809
+ },
810
+ {
811
+ "epoch": 1.46,
812
+ "learning_rate": 0.00010846186953657361,
813
+ "loss": 0.5299,
814
+ "step": 665
815
+ },
816
+ {
817
+ "epoch": 1.47,
818
+ "learning_rate": 0.00010651540900238634,
819
+ "loss": 0.6369,
820
+ "step": 670
821
+ },
822
+ {
823
+ "epoch": 1.48,
824
+ "learning_rate": 0.00010452396189956808,
825
+ "loss": 0.532,
826
+ "step": 675
827
+ },
828
+ {
829
+ "epoch": 1.49,
830
+ "learning_rate": 0.0001024898910767386,
831
+ "loss": 0.5957,
832
+ "step": 680
833
+ },
834
+ {
835
+ "epoch": 1.5,
836
+ "learning_rate": 0.00010041560995548433,
837
+ "loss": 0.4907,
838
+ "step": 685
839
+ },
840
+ {
841
+ "epoch": 1.51,
842
+ "learning_rate": 9.830357966684779e-05,
843
+ "loss": 0.5726,
844
+ "step": 690
845
+ },
846
+ {
847
+ "epoch": 1.52,
848
+ "learning_rate": 9.615630613119297e-05,
849
+ "loss": 0.5582,
850
+ "step": 695
851
+ },
852
+ {
853
+ "epoch": 1.54,
854
+ "learning_rate": 9.397633708493872e-05,
855
+ "loss": 0.6085,
856
+ "step": 700
857
+ },
858
+ {
859
+ "epoch": 1.55,
860
+ "learning_rate": 9.17662590576683e-05,
861
+ "loss": 0.6075,
862
+ "step": 705
863
+ },
864
+ {
865
+ "epoch": 1.56,
866
+ "learning_rate": 8.95286943032199e-05,
867
+ "loss": 0.5199,
868
+ "step": 710
869
+ },
870
+ {
871
+ "epoch": 1.57,
872
+ "learning_rate": 8.726629768839068e-05,
873
+ "loss": 0.565,
874
+ "step": 715
875
+ },
876
+ {
877
+ "epoch": 1.58,
878
+ "learning_rate": 8.49817535429363e-05,
879
+ "loss": 0.5779,
880
+ "step": 720
881
+ },
882
+ {
883
+ "epoch": 1.59,
884
+ "learning_rate": 8.267777247462197e-05,
885
+ "loss": 0.514,
886
+ "step": 725
887
+ },
888
+ {
889
+ "epoch": 1.6,
890
+ "learning_rate": 8.035708815308325e-05,
891
+ "loss": 0.6287,
892
+ "step": 730
893
+ },
894
+ {
895
+ "epoch": 1.61,
896
+ "learning_rate": 7.802245406634311e-05,
897
+ "loss": 0.5617,
898
+ "step": 735
899
+ },
900
+ {
901
+ "epoch": 1.62,
902
+ "learning_rate": 7.56766402537954e-05,
903
+ "loss": 0.6135,
904
+ "step": 740
905
+ },
906
+ {
907
+ "epoch": 1.63,
908
+ "learning_rate": 7.332243001956172e-05,
909
+ "loss": 0.5151,
910
+ "step": 745
911
+ },
912
+ {
913
+ "epoch": 1.64,
914
+ "learning_rate": 7.096261663009792e-05,
915
+ "loss": 0.5282,
916
+ "step": 750
917
+ },
918
+ {
919
+ "epoch": 1.66,
920
+ "learning_rate": 6.85999999999999e-05,
921
+ "loss": 0.5488,
922
+ "step": 755
923
+ },
924
+ {
925
+ "epoch": 1.67,
926
+ "learning_rate": 6.623738336990383e-05,
927
+ "loss": 0.5678,
928
+ "step": 760
929
+ },
930
+ {
931
+ "epoch": 1.68,
932
+ "learning_rate": 6.387756998044003e-05,
933
+ "loss": 0.5534,
934
+ "step": 765
935
+ },
936
+ {
937
+ "epoch": 1.69,
938
+ "learning_rate": 6.152335974620634e-05,
939
+ "loss": 0.6249,
940
+ "step": 770
941
+ },
942
+ {
943
+ "epoch": 1.7,
944
+ "learning_rate": 5.917754593365669e-05,
945
+ "loss": 0.5588,
946
+ "step": 775
947
+ },
948
+ {
949
+ "epoch": 1.71,
950
+ "learning_rate": 5.684291184691846e-05,
951
+ "loss": 0.4985,
952
+ "step": 780
953
+ },
954
+ {
955
+ "epoch": 1.72,
956
+ "learning_rate": 5.452222752537974e-05,
957
+ "loss": 0.545,
958
+ "step": 785
959
+ },
960
+ {
961
+ "epoch": 1.73,
962
+ "learning_rate": 5.2218246457065396e-05,
963
+ "loss": 0.5905,
964
+ "step": 790
965
+ },
966
+ {
967
+ "epoch": 1.74,
968
+ "learning_rate": 4.993370231160913e-05,
969
+ "loss": 0.4741,
970
+ "step": 795
971
+ },
972
+ {
973
+ "epoch": 1.75,
974
+ "learning_rate": 4.767130569678176e-05,
975
+ "loss": 0.5473,
976
+ "step": 800
977
+ },
978
+ {
979
+ "epoch": 1.77,
980
+ "learning_rate": 4.543374094233335e-05,
981
+ "loss": 0.525,
982
+ "step": 805
983
+ },
984
+ {
985
+ "epoch": 1.78,
986
+ "learning_rate": 4.3223662915062914e-05,
987
+ "loss": 0.5134,
988
+ "step": 810
989
+ },
990
+ {
991
+ "epoch": 1.79,
992
+ "learning_rate": 4.104369386880686e-05,
993
+ "loss": 0.5726,
994
+ "step": 815
995
+ },
996
+ {
997
+ "epoch": 1.8,
998
+ "learning_rate": 3.8896420333152035e-05,
999
+ "loss": 0.4985,
1000
+ "step": 820
1001
+ },
1002
+ {
1003
+ "epoch": 1.81,
1004
+ "learning_rate": 3.6784390044517205e-05,
1005
+ "loss": 0.5251,
1006
+ "step": 825
1007
+ },
1008
+ {
1009
+ "epoch": 1.82,
1010
+ "learning_rate": 3.471010892326291e-05,
1011
+ "loss": 0.6068,
1012
+ "step": 830
1013
+ },
1014
+ {
1015
+ "epoch": 1.83,
1016
+ "learning_rate": 3.267603810043175e-05,
1017
+ "loss": 0.5227,
1018
+ "step": 835
1019
+ },
1020
+ {
1021
+ "epoch": 1.84,
1022
+ "learning_rate": 3.068459099761349e-05,
1023
+ "loss": 0.5061,
1024
+ "step": 840
1025
+ },
1026
+ {
1027
+ "epoch": 1.85,
1028
+ "learning_rate": 2.8738130463427823e-05,
1029
+ "loss": 0.5539,
1030
+ "step": 845
1031
+ },
1032
+ {
1033
+ "epoch": 1.86,
1034
+ "learning_rate": 2.683896597000207e-05,
1035
+ "loss": 0.5975,
1036
+ "step": 850
1037
+ },
1038
+ {
1039
+ "epoch": 1.88,
1040
+ "learning_rate": 2.4989350872796035e-05,
1041
+ "loss": 0.5843,
1042
+ "step": 855
1043
+ },
1044
+ {
1045
+ "epoch": 1.89,
1046
+ "learning_rate": 2.3191479736993138e-05,
1047
+ "loss": 0.571,
1048
+ "step": 860
1049
+ },
1050
+ {
1051
+ "epoch": 1.9,
1052
+ "learning_rate": 2.1447485733655544e-05,
1053
+ "loss": 0.5282,
1054
+ "step": 865
1055
+ },
1056
+ {
1057
+ "epoch": 1.91,
1058
+ "learning_rate": 1.975943810871599e-05,
1059
+ "loss": 0.5626,
1060
+ "step": 870
1061
+ },
1062
+ {
1063
+ "epoch": 1.92,
1064
+ "learning_rate": 1.8129339727824064e-05,
1065
+ "loss": 0.5394,
1066
+ "step": 875
1067
+ },
1068
+ {
1069
+ "epoch": 1.93,
1070
+ "learning_rate": 1.6559124699952118e-05,
1071
+ "loss": 0.4747,
1072
+ "step": 880
1073
+ },
1074
+ {
1075
+ "epoch": 1.94,
1076
+ "learning_rate": 1.5050656082574002e-05,
1077
+ "loss": 0.5601,
1078
+ "step": 885
1079
+ },
1080
+ {
1081
+ "epoch": 1.95,
1082
+ "learning_rate": 1.3605723671153187e-05,
1083
+ "loss": 0.4725,
1084
+ "step": 890
1085
+ },
1086
+ {
1087
+ "epoch": 1.96,
1088
+ "learning_rate": 1.22260418755487e-05,
1089
+ "loss": 0.5171,
1090
+ "step": 895
1091
+ },
1092
+ {
1093
+ "epoch": 1.97,
1094
+ "learning_rate": 1.0913247685877927e-05,
1095
+ "loss": 0.5425,
1096
+ "step": 900
1097
+ },
1098
+ {
1099
+ "epoch": 1.98,
1100
+ "learning_rate": 9.66889873022574e-06,
1101
+ "loss": 0.4969,
1102
+ "step": 905
1103
+ },
1104
+ {
1105
+ "epoch": 2.0,
1106
+ "learning_rate": 8.494471426523612e-06,
1107
+ "loss": 0.5235,
1108
+ "step": 910
1109
+ },
1110
+ {
1111
+ "epoch": 2.0,
1112
+ "eval_loss": 0.21034620702266693,
1113
+ "eval_runtime": 15.3022,
1114
+ "eval_samples_per_second": 41.432,
1115
+ "eval_steps_per_second": 5.228,
1116
+ "step": 912
1117
  }
1118
  ],
1119
+ "max_steps": 2736,
1120
+ "num_train_epochs": 6,
1121
+ "total_flos": 951886872576000.0,
1122
  "trial_name": null,
1123
  "trial_params": null
1124
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dba39e4831d67bb968da088ccd9c21edb2b352696e51f4a43088020c61c6578
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ea3cda2785536f15c20f453c0b9bcc0101903ca9e2ecd4b40b57e318d77ca5
3
  size 3055