kmnis commited on
Commit
f789fea
·
1 Parent(s): 32fb916

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d78c12c945e5fcb9c639c770a4e4f00430ece4b7fb6cb44feb0d796c2c4df01
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffbc24d23529af8af58002eafbfdb64c2a781bf66b0719975cfff0945fd2ff68
3
  size 19744138
last-checkpoint/global_step5000/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b72cafcd98a035b2a3b5a61d62d3e4f3f83f0aeabd67ddc6bd9f4035c3bbf585
3
+ size 6508458036
last-checkpoint/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b29ae6ff817af5ebb5e83e17100d02cef546f6f622fc1b7eedac4fb34aed11bd
3
+ size 29495149
last-checkpoint/global_step5000/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2d361ec26a001dc6db84b60aa6477bcc988754b718ff8016a7fa57622c48dc
3
+ size 6508458036
last-checkpoint/global_step5000/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b59806db956177a4b1672e76efb1ff9ede774c227ed9037dde44000da15ebf7
3
+ size 29495149
last-checkpoint/global_step5000/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ae99b71628000244c5471c84eb4a5b785cc97d8a51db8a4abb85674d9b3477
3
+ size 6508458036
last-checkpoint/global_step5000/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45d35aea5ce3610a047a969a6a855504aeac3fdcb34d14405a5e22e00979b2a
3
+ size 29495149
last-checkpoint/global_step5000/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c29d23a8de4915647e2d9db13464e39fc71d866d477be37a86b5728a0d87ad
3
+ size 6508458036
last-checkpoint/global_step5000/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba461e42a784b1b9e5809cf90e977fcff7a4d3819162223c50a1d1c46222d463
3
+ size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4500
 
1
+ global_step5000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94222b0d851841afa61b6aafcea0bff04917dffca319ac60be1b7888470e4549
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:856870ac2f25ace5a84f27e317129901b4ed17c9dc7320e4029c4b82423d5656
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eb7e27f4429243bc102257eec60825acb13cbb47a2667ad973c02b4b2dc57aa
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab79bc6f0461a8e513578650799fc17047d99f83ccbabaf66e937b791f79620d
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0caa8a8996d3cb7ae233c1902bff89eb356bf1170533c424be993eaf5748b7e9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca99fcf04653e8f1e27181a35aa710186a4259c4b45f8b79ef9c5ed1e4d100e2
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cafc79dc7941f6eb3a98f0b8b14f36db6f3be67545040d9683537528c3545459
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01cbaf32cc3aaa1caa94c373857611f4262dec7a3d93ef26de5cde9cffbb4972
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.897133220910624,
5
  "eval_steps": 500,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2707,13 +2707,313 @@
2707
  "learning_rate": 1e-05,
2708
  "loss": 0.6637,
2709
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2710
  }
2711
  ],
2712
  "logging_steps": 10,
2713
  "max_steps": 5000,
2714
  "num_train_epochs": 3,
2715
  "save_steps": 500,
2716
- "total_flos": 1131517606625280.0,
2717
  "trial_name": null,
2718
  "trial_params": null
2719
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.1079258010118043,
5
  "eval_steps": 500,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2707
  "learning_rate": 1e-05,
2708
  "loss": 0.6637,
2709
  "step": 4500
2710
+ },
2711
+ {
2712
+ "epoch": 1.9,
2713
+ "learning_rate": 1e-05,
2714
+ "loss": 0.6336,
2715
+ "step": 4510
2716
+ },
2717
+ {
2718
+ "epoch": 1.91,
2719
+ "learning_rate": 1e-05,
2720
+ "loss": 0.7105,
2721
+ "step": 4520
2722
+ },
2723
+ {
2724
+ "epoch": 1.91,
2725
+ "learning_rate": 1e-05,
2726
+ "loss": 0.6647,
2727
+ "step": 4530
2728
+ },
2729
+ {
2730
+ "epoch": 1.91,
2731
+ "learning_rate": 1e-05,
2732
+ "loss": 0.6898,
2733
+ "step": 4540
2734
+ },
2735
+ {
2736
+ "epoch": 1.92,
2737
+ "learning_rate": 1e-05,
2738
+ "loss": 0.6854,
2739
+ "step": 4550
2740
+ },
2741
+ {
2742
+ "epoch": 1.92,
2743
+ "learning_rate": 1e-05,
2744
+ "loss": 0.6788,
2745
+ "step": 4560
2746
+ },
2747
+ {
2748
+ "epoch": 1.93,
2749
+ "learning_rate": 1e-05,
2750
+ "loss": 0.6794,
2751
+ "step": 4570
2752
+ },
2753
+ {
2754
+ "epoch": 1.93,
2755
+ "learning_rate": 1e-05,
2756
+ "loss": 0.6683,
2757
+ "step": 4580
2758
+ },
2759
+ {
2760
+ "epoch": 1.94,
2761
+ "learning_rate": 1e-05,
2762
+ "loss": 0.641,
2763
+ "step": 4590
2764
+ },
2765
+ {
2766
+ "epoch": 1.94,
2767
+ "learning_rate": 1e-05,
2768
+ "loss": 0.6193,
2769
+ "step": 4600
2770
+ },
2771
+ {
2772
+ "epoch": 1.94,
2773
+ "learning_rate": 1e-05,
2774
+ "loss": 0.7397,
2775
+ "step": 4610
2776
+ },
2777
+ {
2778
+ "epoch": 1.95,
2779
+ "learning_rate": 1e-05,
2780
+ "loss": 0.6435,
2781
+ "step": 4620
2782
+ },
2783
+ {
2784
+ "epoch": 1.95,
2785
+ "learning_rate": 1e-05,
2786
+ "loss": 0.6489,
2787
+ "step": 4630
2788
+ },
2789
+ {
2790
+ "epoch": 1.96,
2791
+ "learning_rate": 1e-05,
2792
+ "loss": 0.622,
2793
+ "step": 4640
2794
+ },
2795
+ {
2796
+ "epoch": 1.96,
2797
+ "learning_rate": 1e-05,
2798
+ "loss": 0.6476,
2799
+ "step": 4650
2800
+ },
2801
+ {
2802
+ "epoch": 1.96,
2803
+ "learning_rate": 1e-05,
2804
+ "loss": 0.6058,
2805
+ "step": 4660
2806
+ },
2807
+ {
2808
+ "epoch": 1.97,
2809
+ "learning_rate": 1e-05,
2810
+ "loss": 0.6626,
2811
+ "step": 4670
2812
+ },
2813
+ {
2814
+ "epoch": 1.97,
2815
+ "learning_rate": 1e-05,
2816
+ "loss": 0.6546,
2817
+ "step": 4680
2818
+ },
2819
+ {
2820
+ "epoch": 1.98,
2821
+ "learning_rate": 1e-05,
2822
+ "loss": 0.6935,
2823
+ "step": 4690
2824
+ },
2825
+ {
2826
+ "epoch": 1.98,
2827
+ "learning_rate": 1e-05,
2828
+ "loss": 0.6579,
2829
+ "step": 4700
2830
+ },
2831
+ {
2832
+ "epoch": 1.99,
2833
+ "learning_rate": 1e-05,
2834
+ "loss": 0.6239,
2835
+ "step": 4710
2836
+ },
2837
+ {
2838
+ "epoch": 1.99,
2839
+ "learning_rate": 1e-05,
2840
+ "loss": 0.6109,
2841
+ "step": 4720
2842
+ },
2843
+ {
2844
+ "epoch": 1.99,
2845
+ "learning_rate": 1e-05,
2846
+ "loss": 0.6751,
2847
+ "step": 4730
2848
+ },
2849
+ {
2850
+ "epoch": 2.0,
2851
+ "learning_rate": 1e-05,
2852
+ "loss": 0.6457,
2853
+ "step": 4740
2854
+ },
2855
+ {
2856
+ "epoch": 2.0,
2857
+ "learning_rate": 1e-05,
2858
+ "loss": 0.6619,
2859
+ "step": 4750
2860
+ },
2861
+ {
2862
+ "epoch": 2.01,
2863
+ "learning_rate": 1e-05,
2864
+ "loss": 0.6195,
2865
+ "step": 4760
2866
+ },
2867
+ {
2868
+ "epoch": 2.01,
2869
+ "learning_rate": 1e-05,
2870
+ "loss": 0.6394,
2871
+ "step": 4770
2872
+ },
2873
+ {
2874
+ "epoch": 2.02,
2875
+ "learning_rate": 1e-05,
2876
+ "loss": 0.6168,
2877
+ "step": 4780
2878
+ },
2879
+ {
2880
+ "epoch": 2.02,
2881
+ "learning_rate": 1e-05,
2882
+ "loss": 0.6742,
2883
+ "step": 4790
2884
+ },
2885
+ {
2886
+ "epoch": 2.02,
2887
+ "learning_rate": 1e-05,
2888
+ "loss": 0.6864,
2889
+ "step": 4800
2890
+ },
2891
+ {
2892
+ "epoch": 2.03,
2893
+ "learning_rate": 1e-05,
2894
+ "loss": 0.6678,
2895
+ "step": 4810
2896
+ },
2897
+ {
2898
+ "epoch": 2.03,
2899
+ "learning_rate": 1e-05,
2900
+ "loss": 0.6452,
2901
+ "step": 4820
2902
+ },
2903
+ {
2904
+ "epoch": 2.04,
2905
+ "learning_rate": 1e-05,
2906
+ "loss": 0.6478,
2907
+ "step": 4830
2908
+ },
2909
+ {
2910
+ "epoch": 2.04,
2911
+ "learning_rate": 1e-05,
2912
+ "loss": 0.6388,
2913
+ "step": 4840
2914
+ },
2915
+ {
2916
+ "epoch": 2.04,
2917
+ "learning_rate": 1e-05,
2918
+ "loss": 0.7307,
2919
+ "step": 4850
2920
+ },
2921
+ {
2922
+ "epoch": 2.05,
2923
+ "learning_rate": 1e-05,
2924
+ "loss": 0.6212,
2925
+ "step": 4860
2926
+ },
2927
+ {
2928
+ "epoch": 2.05,
2929
+ "learning_rate": 1e-05,
2930
+ "loss": 0.6104,
2931
+ "step": 4870
2932
+ },
2933
+ {
2934
+ "epoch": 2.06,
2935
+ "learning_rate": 1e-05,
2936
+ "loss": 0.6343,
2937
+ "step": 4880
2938
+ },
2939
+ {
2940
+ "epoch": 2.06,
2941
+ "learning_rate": 1e-05,
2942
+ "loss": 0.564,
2943
+ "step": 4890
2944
+ },
2945
+ {
2946
+ "epoch": 2.07,
2947
+ "learning_rate": 1e-05,
2948
+ "loss": 0.6071,
2949
+ "step": 4900
2950
+ },
2951
+ {
2952
+ "epoch": 2.07,
2953
+ "learning_rate": 1e-05,
2954
+ "loss": 0.6524,
2955
+ "step": 4910
2956
+ },
2957
+ {
2958
+ "epoch": 2.07,
2959
+ "learning_rate": 1e-05,
2960
+ "loss": 0.6491,
2961
+ "step": 4920
2962
+ },
2963
+ {
2964
+ "epoch": 2.08,
2965
+ "learning_rate": 1e-05,
2966
+ "loss": 0.6814,
2967
+ "step": 4930
2968
+ },
2969
+ {
2970
+ "epoch": 2.08,
2971
+ "learning_rate": 1e-05,
2972
+ "loss": 0.6735,
2973
+ "step": 4940
2974
+ },
2975
+ {
2976
+ "epoch": 2.09,
2977
+ "learning_rate": 1e-05,
2978
+ "loss": 0.6388,
2979
+ "step": 4950
2980
+ },
2981
+ {
2982
+ "epoch": 2.09,
2983
+ "learning_rate": 1e-05,
2984
+ "loss": 0.6337,
2985
+ "step": 4960
2986
+ },
2987
+ {
2988
+ "epoch": 2.1,
2989
+ "learning_rate": 1e-05,
2990
+ "loss": 0.6233,
2991
+ "step": 4970
2992
+ },
2993
+ {
2994
+ "epoch": 2.1,
2995
+ "learning_rate": 1e-05,
2996
+ "loss": 0.5838,
2997
+ "step": 4980
2998
+ },
2999
+ {
3000
+ "epoch": 2.1,
3001
+ "learning_rate": 1e-05,
3002
+ "loss": 0.5723,
3003
+ "step": 4990
3004
+ },
3005
+ {
3006
+ "epoch": 2.11,
3007
+ "learning_rate": 1e-05,
3008
+ "loss": 0.5832,
3009
+ "step": 5000
3010
  }
3011
  ],
3012
  "logging_steps": 10,
3013
  "max_steps": 5000,
3014
  "num_train_epochs": 3,
3015
  "save_steps": 500,
3016
+ "total_flos": 1257157857116160.0,
3017
  "trial_name": null,
3018
  "trial_params": null
3019
  }