CocoRoF commited on
Commit
8d4e96d
·
verified ·
1 Parent(s): a034fb5

Training in progress, step 4280, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9576c675d3783e38d1737c6a59363ea426ec11cb7ca3a89afb856ddcdfd62aeb
3
  size 368988278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517491d8759942f27beaaca2325a4b97a206a4bc50d01b37f43e9dbfa7a419a1
3
  size 368988278
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e409661b0f642fac9cad18fe922e5228292aae46d4deddb7b8d039a78c553a02
3
  size 1107079290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:149043e2c866159ce43cb83040a8df7ea2f79cc230e8dcb485ab8e3def1116c3
3
  size 1107079290
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83484e3d787e0e8d73b7b9884f130016f0b4f3305f1905ab1f975834ec2fd834
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ffa9145f2f47a4ee9348b70f818bdbc6838c07fe673a33bbd23b4e0aa3723fd
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9343884135836715,
5
  "eval_steps": 2000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2823,6 +2823,202 @@
2823
  "eval_samples_per_second": 1678.827,
2824
  "eval_steps_per_second": 52.47,
2825
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2826
  }
2827
  ],
2828
  "logging_steps": 10,
@@ -2837,12 +3033,12 @@
2837
  "should_evaluate": false,
2838
  "should_log": false,
2839
  "should_save": true,
2840
- "should_training_stop": false
2841
  },
2842
  "attributes": {}
2843
  }
2844
  },
2845
- "total_flos": 1.10444663109845e+19,
2846
  "train_batch_size": 4,
2847
  "trial_name": null,
2848
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997956025345286,
5
  "eval_steps": 2000,
6
+ "global_step": 4280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2823
  "eval_samples_per_second": 1678.827,
2824
  "eval_steps_per_second": 52.47,
2825
  "step": 4000
2826
+ },
2827
+ {
2828
+ "epoch": 0.9367243846176307,
2829
+ "grad_norm": 46.65625,
2830
+ "learning_rate": 9.981704618557266e-07,
2831
+ "loss": 86.0902,
2832
+ "step": 4010
2833
+ },
2834
+ {
2835
+ "epoch": 0.93906035565159,
2836
+ "grad_norm": 44.65625,
2837
+ "learning_rate": 9.98165899416464e-07,
2838
+ "loss": 85.4592,
2839
+ "step": 4020
2840
+ },
2841
+ {
2842
+ "epoch": 0.9413963266855491,
2843
+ "grad_norm": 44.34375,
2844
+ "learning_rate": 9.981613369772013e-07,
2845
+ "loss": 86.5728,
2846
+ "step": 4030
2847
+ },
2848
+ {
2849
+ "epoch": 0.9437322977195083,
2850
+ "grad_norm": 46.03125,
2851
+ "learning_rate": 9.98156774537939e-07,
2852
+ "loss": 87.2485,
2853
+ "step": 4040
2854
+ },
2855
+ {
2856
+ "epoch": 0.9460682687534675,
2857
+ "grad_norm": 48.28125,
2858
+ "learning_rate": 9.981522120986765e-07,
2859
+ "loss": 87.1623,
2860
+ "step": 4050
2861
+ },
2862
+ {
2863
+ "epoch": 0.9484042397874266,
2864
+ "grad_norm": 47.96875,
2865
+ "learning_rate": 9.981476496594138e-07,
2866
+ "loss": 86.2034,
2867
+ "step": 4060
2868
+ },
2869
+ {
2870
+ "epoch": 0.9507402108213858,
2871
+ "grad_norm": 48.25,
2872
+ "learning_rate": 9.981430872201514e-07,
2873
+ "loss": 86.5078,
2874
+ "step": 4070
2875
+ },
2876
+ {
2877
+ "epoch": 0.953076181855345,
2878
+ "grad_norm": 44.53125,
2879
+ "learning_rate": 9.981385247808888e-07,
2880
+ "loss": 86.3279,
2881
+ "step": 4080
2882
+ },
2883
+ {
2884
+ "epoch": 0.9554121528893041,
2885
+ "grad_norm": 45.6875,
2886
+ "learning_rate": 9.981339623416264e-07,
2887
+ "loss": 86.4747,
2888
+ "step": 4090
2889
+ },
2890
+ {
2891
+ "epoch": 0.9577481239232634,
2892
+ "grad_norm": 47.53125,
2893
+ "learning_rate": 9.981293999023637e-07,
2894
+ "loss": 85.3221,
2895
+ "step": 4100
2896
+ },
2897
+ {
2898
+ "epoch": 0.9600840949572226,
2899
+ "grad_norm": 47.15625,
2900
+ "learning_rate": 9.981248374631013e-07,
2901
+ "loss": 85.7835,
2902
+ "step": 4110
2903
+ },
2904
+ {
2905
+ "epoch": 0.9624200659911817,
2906
+ "grad_norm": 45.96875,
2907
+ "learning_rate": 9.981202750238387e-07,
2908
+ "loss": 85.919,
2909
+ "step": 4120
2910
+ },
2911
+ {
2912
+ "epoch": 0.9647560370251409,
2913
+ "grad_norm": 46.40625,
2914
+ "learning_rate": 9.981157125845762e-07,
2915
+ "loss": 86.6488,
2916
+ "step": 4130
2917
+ },
2918
+ {
2919
+ "epoch": 0.9670920080591001,
2920
+ "grad_norm": 47.8125,
2921
+ "learning_rate": 9.981111501453136e-07,
2922
+ "loss": 86.7465,
2923
+ "step": 4140
2924
+ },
2925
+ {
2926
+ "epoch": 0.9694279790930592,
2927
+ "grad_norm": 50.96875,
2928
+ "learning_rate": 9.981065877060512e-07,
2929
+ "loss": 85.8423,
2930
+ "step": 4150
2931
+ },
2932
+ {
2933
+ "epoch": 0.9717639501270184,
2934
+ "grad_norm": 44.84375,
2935
+ "learning_rate": 9.981020252667885e-07,
2936
+ "loss": 86.4872,
2937
+ "step": 4160
2938
+ },
2939
+ {
2940
+ "epoch": 0.9740999211609777,
2941
+ "grad_norm": 51.46875,
2942
+ "learning_rate": 9.980974628275261e-07,
2943
+ "loss": 86.9111,
2944
+ "step": 4170
2945
+ },
2946
+ {
2947
+ "epoch": 0.9764358921949368,
2948
+ "grad_norm": 46.25,
2949
+ "learning_rate": 9.980929003882635e-07,
2950
+ "loss": 86.4476,
2951
+ "step": 4180
2952
+ },
2953
+ {
2954
+ "epoch": 0.978771863228896,
2955
+ "grad_norm": 47.0625,
2956
+ "learning_rate": 9.98088337949001e-07,
2957
+ "loss": 86.3345,
2958
+ "step": 4190
2959
+ },
2960
+ {
2961
+ "epoch": 0.9811078342628552,
2962
+ "grad_norm": 47.96875,
2963
+ "learning_rate": 9.980837755097384e-07,
2964
+ "loss": 87.4492,
2965
+ "step": 4200
2966
+ },
2967
+ {
2968
+ "epoch": 0.9834438052968143,
2969
+ "grad_norm": 47.53125,
2970
+ "learning_rate": 9.98079213070476e-07,
2971
+ "loss": 87.3175,
2972
+ "step": 4210
2973
+ },
2974
+ {
2975
+ "epoch": 0.9857797763307735,
2976
+ "grad_norm": 47.84375,
2977
+ "learning_rate": 9.980746506312134e-07,
2978
+ "loss": 85.7159,
2979
+ "step": 4220
2980
+ },
2981
+ {
2982
+ "epoch": 0.9881157473647327,
2983
+ "grad_norm": 50.5,
2984
+ "learning_rate": 9.98070088191951e-07,
2985
+ "loss": 85.7232,
2986
+ "step": 4230
2987
+ },
2988
+ {
2989
+ "epoch": 0.9904517183986918,
2990
+ "grad_norm": 47.1875,
2991
+ "learning_rate": 9.980655257526883e-07,
2992
+ "loss": 86.1964,
2993
+ "step": 4240
2994
+ },
2995
+ {
2996
+ "epoch": 0.992787689432651,
2997
+ "grad_norm": 46.15625,
2998
+ "learning_rate": 9.980609633134259e-07,
2999
+ "loss": 86.2977,
3000
+ "step": 4250
3001
+ },
3002
+ {
3003
+ "epoch": 0.9951236604666102,
3004
+ "grad_norm": 44.8125,
3005
+ "learning_rate": 9.980564008741632e-07,
3006
+ "loss": 85.6801,
3007
+ "step": 4260
3008
+ },
3009
+ {
3010
+ "epoch": 0.9974596315005694,
3011
+ "grad_norm": 46.15625,
3012
+ "learning_rate": 9.980518384349008e-07,
3013
+ "loss": 85.8044,
3014
+ "step": 4270
3015
+ },
3016
+ {
3017
+ "epoch": 0.9997956025345286,
3018
+ "grad_norm": 46.75,
3019
+ "learning_rate": 9.980472759956384e-07,
3020
+ "loss": 86.1971,
3021
+ "step": 4280
3022
  }
3023
  ],
3024
  "logging_steps": 10,
 
3033
  "should_evaluate": false,
3034
  "should_log": false,
3035
  "should_save": true,
3036
+ "should_training_stop": true
3037
  },
3038
  "attributes": {}
3039
  }
3040
  },
3041
+ "total_flos": 1.1817578952753414e+19,
3042
  "train_batch_size": 4,
3043
  "trial_name": null,
3044
  "trial_params": null