learn3r commited on
Commit
e03e6a3
1 Parent(s): e811081

Upload 14 files

Browse files
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80e8cff92b64c2ad4b9132ff437ea1379034d64691dc5452e09f1c4a7d18eb7f
3
+ size 10772000
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:158449422dcb4cb5c95cfa81f690cf9df3e35c9c8c7a7224cf78104faaa9f695
3
  size 9970100058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1aef1ddb237ffa7996e90c50681fe1d8ee82dd86aafa7bbc0cd650180a879b
3
  size 9970100058
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:517ae94594dc8958077419959257e136f3e7cb73d326b2ba66bd758e5f80030e
3
  size 1429345899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99d7b7660e48a5b315907df18f543fa6ede285b3212371296679cdc6e63c26ca
3
  size 1429345899
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:868963bd1d2ded8c6528bf632d66e3b7e9f089d8390bb29d8da29f99476f303b
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981ba4355835688ed2f12d5a98faa3fea37265615078aad1b3c66eecf5f6afc2
3
+ size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.237640380859375,
3
  "best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14",
4
- "epoch": 14.608695652173914,
5
  "eval_steps": 500,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -624,230 +624,13 @@
624
  "eval_samples_per_second": 0.231,
625
  "eval_steps_per_second": 0.029,
626
  "step": 158
627
- },
628
- {
629
- "epoch": 11.13,
630
- "learning_rate": 0.0005,
631
- "loss": 0.0548,
632
- "step": 160
633
- },
634
- {
635
- "epoch": 11.27,
636
- "learning_rate": 0.0005,
637
- "loss": 0.0477,
638
- "step": 162
639
- },
640
- {
641
- "epoch": 11.41,
642
- "learning_rate": 0.0005,
643
- "loss": 0.052,
644
- "step": 164
645
- },
646
- {
647
- "epoch": 11.55,
648
- "learning_rate": 0.0005,
649
- "loss": 0.053,
650
- "step": 166
651
- },
652
- {
653
- "epoch": 11.69,
654
- "learning_rate": 0.0005,
655
- "loss": 0.0525,
656
- "step": 168
657
- },
658
- {
659
- "epoch": 11.83,
660
- "learning_rate": 0.0005,
661
- "loss": 0.0555,
662
- "step": 170
663
- },
664
- {
665
- "epoch": 11.97,
666
- "learning_rate": 0.0005,
667
- "loss": 0.0557,
668
- "step": 172
669
- },
670
- {
671
- "epoch": 11.97,
672
- "eval_gen_len": 270.9674556213018,
673
- "eval_loss": 3.3825831413269043,
674
- "eval_rouge1": 42.3089,
675
- "eval_rouge2": 18.2735,
676
- "eval_rougeL": 29.0356,
677
- "eval_rougeLsum": 40.4154,
678
- "eval_runtime": 1704.6973,
679
- "eval_samples_per_second": 0.198,
680
- "eval_steps_per_second": 0.025,
681
- "step": 172
682
- },
683
- {
684
- "epoch": 12.1,
685
- "learning_rate": 0.0005,
686
- "loss": 0.0513,
687
- "step": 174
688
- },
689
- {
690
- "epoch": 12.24,
691
- "learning_rate": 0.0005,
692
- "loss": 0.0419,
693
- "step": 176
694
- },
695
- {
696
- "epoch": 12.38,
697
- "learning_rate": 0.0005,
698
- "loss": 0.0525,
699
- "step": 178
700
- },
701
- {
702
- "epoch": 12.52,
703
- "learning_rate": 0.0005,
704
- "loss": 0.0535,
705
- "step": 180
706
- },
707
- {
708
- "epoch": 12.66,
709
- "learning_rate": 0.0005,
710
- "loss": 0.0611,
711
- "step": 182
712
- },
713
- {
714
- "epoch": 12.8,
715
- "learning_rate": 0.0005,
716
- "loss": 0.0444,
717
- "step": 184
718
- },
719
- {
720
- "epoch": 12.94,
721
- "learning_rate": 0.0005,
722
- "loss": 0.0542,
723
- "step": 186
724
- },
725
- {
726
- "epoch": 12.94,
727
- "eval_gen_len": 186.73076923076923,
728
- "eval_loss": 3.4408490657806396,
729
- "eval_rouge1": 40.7691,
730
- "eval_rouge2": 16.529,
731
- "eval_rougeL": 28.3999,
732
- "eval_rougeLsum": 38.9723,
733
- "eval_runtime": 1525.6668,
734
- "eval_samples_per_second": 0.222,
735
- "eval_steps_per_second": 0.028,
736
- "step": 186
737
- },
738
- {
739
- "epoch": 13.08,
740
- "learning_rate": 0.0005,
741
- "loss": 0.0602,
742
- "step": 188
743
- },
744
- {
745
- "epoch": 13.22,
746
- "learning_rate": 0.0005,
747
- "loss": 0.0438,
748
- "step": 190
749
- },
750
- {
751
- "epoch": 13.36,
752
- "learning_rate": 0.0005,
753
- "loss": 0.0503,
754
- "step": 192
755
- },
756
- {
757
- "epoch": 13.5,
758
- "learning_rate": 0.0005,
759
- "loss": 0.046,
760
- "step": 194
761
- },
762
- {
763
- "epoch": 13.63,
764
- "learning_rate": 0.0005,
765
- "loss": 0.0368,
766
- "step": 196
767
- },
768
- {
769
- "epoch": 13.77,
770
- "learning_rate": 0.0005,
771
- "loss": 0.0572,
772
- "step": 198
773
- },
774
- {
775
- "epoch": 13.91,
776
- "learning_rate": 0.0005,
777
- "loss": 0.0596,
778
- "step": 200
779
- },
780
- {
781
- "epoch": 13.98,
782
- "eval_gen_len": 398.4704142011834,
783
- "eval_loss": 3.525272846221924,
784
- "eval_rouge1": 37.0037,
785
- "eval_rouge2": 15.9098,
786
- "eval_rougeL": 25.2808,
787
- "eval_rougeLsum": 35.3868,
788
- "eval_runtime": 1778.3289,
789
- "eval_samples_per_second": 0.19,
790
- "eval_steps_per_second": 0.024,
791
- "step": 201
792
- },
793
- {
794
- "epoch": 14.05,
795
- "learning_rate": 0.0005,
796
- "loss": 0.0434,
797
- "step": 202
798
- },
799
- {
800
- "epoch": 14.19,
801
- "learning_rate": 0.0005,
802
- "loss": 0.0453,
803
- "step": 204
804
- },
805
- {
806
- "epoch": 14.33,
807
- "learning_rate": 0.0005,
808
- "loss": 0.0453,
809
- "step": 206
810
- },
811
- {
812
- "epoch": 14.47,
813
- "learning_rate": 0.0005,
814
- "loss": 0.0586,
815
- "step": 208
816
- },
817
- {
818
- "epoch": 14.61,
819
- "learning_rate": 0.0005,
820
- "loss": 0.0385,
821
- "step": 210
822
- },
823
- {
824
- "epoch": 14.61,
825
- "eval_gen_len": 499.31065088757396,
826
- "eval_loss": 3.498972177505493,
827
- "eval_rouge1": 32.5815,
828
- "eval_rouge2": 14.2951,
829
- "eval_rougeL": 22.4501,
830
- "eval_rougeLsum": 31.2928,
831
- "eval_runtime": 1779.9602,
832
- "eval_samples_per_second": 0.19,
833
- "eval_steps_per_second": 0.024,
834
- "step": 210
835
- },
836
- {
837
- "epoch": 14.61,
838
- "step": 210,
839
- "total_flos": 3.6715371967648973e+18,
840
- "train_loss": 0.13047422234501158,
841
- "train_runtime": 78176.4332,
842
- "train_samples_per_second": 0.705,
843
- "train_steps_per_second": 0.003
844
  }
845
  ],
846
  "logging_steps": 2,
847
  "max_steps": 210,
848
  "num_train_epochs": 15,
849
  "save_steps": 500,
850
- "total_flos": 3.6715371967648973e+18,
851
  "trial_name": null,
852
  "trial_params": null
853
  }
 
1
  {
2
  "best_metric": 2.237640380859375,
3
  "best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14",
4
+ "epoch": 10.991304347826087,
5
  "eval_steps": 500,
6
+ "global_step": 158,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
624
  "eval_samples_per_second": 0.231,
625
  "eval_steps_per_second": 0.029,
626
  "step": 158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  }
628
  ],
629
  "logging_steps": 2,
630
  "max_steps": 210,
631
  "num_train_epochs": 15,
632
  "save_steps": 500,
633
+ "total_flos": 2.764359930466935e+18,
634
  "trial_name": null,
635
  "trial_params": null
636
  }