Romain-XV commited on
Commit
d6c97a8
·
verified ·
1 Parent(s): 6f12732

Training in progress, step 1920, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:487f4c4b5e9c2d48058b66824c97cb3a7c01a4b25f9347c0d95bcef11ba8367e
3
  size 912336848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b1b0eb8821ed3e8ef53157ad0fe2a6d4a73b217edc43a13c527825df62dece
3
  size 912336848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a0390f2ea7160e746162b460c270e5b6d00a78481b1dbd170ed7add96b0dc6d
3
  size 463916756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1ce888960052569c2e8cf3fbac3a73808b7588b56f53738a8dad72d923e0df
3
  size 463916756
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ede6b7706e49fabc3980bd229c5ca578368c3b0f83d4d38e6e2b8aae193ae907
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996d6df6cc333969754208420b894da3d6ad34807108aff10aacc8b6dc400f6c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4f9770b7699d99c609c7eabb1512ef1aab84731a6105983404dc679f5faf983
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d9cacaadf8439d83df6534e66fde09e58b74c250800597efcb7ccff892221c0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9712321758270264,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1800",
4
- "epoch": 0.37274798094843653,
5
  "eval_steps": 150,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12711,6 +12711,846 @@
12711
  "eval_samples_per_second": 4.32,
12712
  "eval_steps_per_second": 2.16,
12713
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12714
  }
12715
  ],
12716
  "logging_steps": 1,
@@ -12734,12 +13574,12 @@
12734
  "should_evaluate": false,
12735
  "should_log": false,
12736
  "should_save": true,
12737
- "should_training_stop": false
12738
  },
12739
  "attributes": {}
12740
  }
12741
  },
12742
- "total_flos": 2.116960093344891e+18,
12743
  "train_batch_size": 2,
12744
  "trial_name": null,
12745
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9712321758270264,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1800",
4
+ "epoch": 0.39759784634499895,
5
  "eval_steps": 150,
6
+ "global_step": 1920,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12711
  "eval_samples_per_second": 4.32,
12712
  "eval_steps_per_second": 2.16,
12713
  "step": 1800
12714
+ },
12715
+ {
12716
+ "epoch": 0.37295506316007454,
12717
+ "grad_norm": 7.738208770751953,
12718
+ "learning_rate": 1.909457351846866e-06,
12719
+ "loss": 3.3165,
12720
+ "step": 1801
12721
+ },
12722
+ {
12723
+ "epoch": 0.37316214537171255,
12724
+ "grad_norm": 9.508034706115723,
12725
+ "learning_rate": 1.8776008553690438e-06,
12726
+ "loss": 2.879,
12727
+ "step": 1802
12728
+ },
12729
+ {
12730
+ "epoch": 0.3733692275833506,
12731
+ "grad_norm": 8.288263320922852,
12732
+ "learning_rate": 1.8460098201022903e-06,
12733
+ "loss": 3.8177,
12734
+ "step": 1803
12735
+ },
12736
+ {
12737
+ "epoch": 0.3735763097949886,
12738
+ "grad_norm": 6.952064037322998,
12739
+ "learning_rate": 1.8146843315132833e-06,
12740
+ "loss": 3.7097,
12741
+ "step": 1804
12742
+ },
12743
+ {
12744
+ "epoch": 0.37378339200662664,
12745
+ "grad_norm": 8.130983352661133,
12746
+ "learning_rate": 1.783624474350276e-06,
12747
+ "loss": 3.8691,
12748
+ "step": 1805
12749
+ },
12750
+ {
12751
+ "epoch": 0.37399047421826465,
12752
+ "grad_norm": 7.877700328826904,
12753
+ "learning_rate": 1.7528303326428742e-06,
12754
+ "loss": 4.2351,
12755
+ "step": 1806
12756
+ },
12757
+ {
12758
+ "epoch": 0.37419755642990266,
12759
+ "grad_norm": 8.027643203735352,
12760
+ "learning_rate": 1.7223019897018245e-06,
12761
+ "loss": 3.5811,
12762
+ "step": 1807
12763
+ },
12764
+ {
12765
+ "epoch": 0.37440463864154067,
12766
+ "grad_norm": 8.3945894241333,
12767
+ "learning_rate": 1.6920395281187717e-06,
12768
+ "loss": 3.5923,
12769
+ "step": 1808
12770
+ },
12771
+ {
12772
+ "epoch": 0.37461172085317873,
12773
+ "grad_norm": 9.124868392944336,
12774
+ "learning_rate": 1.6620430297660694e-06,
12775
+ "loss": 3.3783,
12776
+ "step": 1809
12777
+ },
12778
+ {
12779
+ "epoch": 0.37481880306481674,
12780
+ "grad_norm": 10.345230102539062,
12781
+ "learning_rate": 1.6323125757964797e-06,
12782
+ "loss": 2.8082,
12783
+ "step": 1810
12784
+ },
12785
+ {
12786
+ "epoch": 0.37502588527645475,
12787
+ "grad_norm": 9.551642417907715,
12788
+ "learning_rate": 1.6028482466430628e-06,
12789
+ "loss": 3.8794,
12790
+ "step": 1811
12791
+ },
12792
+ {
12793
+ "epoch": 0.37523296748809276,
12794
+ "grad_norm": 7.6390156745910645,
12795
+ "learning_rate": 1.573650122018866e-06,
12796
+ "loss": 4.59,
12797
+ "step": 1812
12798
+ },
12799
+ {
12800
+ "epoch": 0.37544004969973077,
12801
+ "grad_norm": 6.970566272735596,
12802
+ "learning_rate": 1.5447182809167571e-06,
12803
+ "loss": 3.4794,
12804
+ "step": 1813
12805
+ },
12806
+ {
12807
+ "epoch": 0.37564713191136884,
12808
+ "grad_norm": 7.257846832275391,
12809
+ "learning_rate": 1.5160528016091914e-06,
12810
+ "loss": 3.4807,
12811
+ "step": 1814
12812
+ },
12813
+ {
12814
+ "epoch": 0.37585421412300685,
12815
+ "grad_norm": 6.892467021942139,
12816
+ "learning_rate": 1.4876537616480334e-06,
12817
+ "loss": 3.3787,
12818
+ "step": 1815
12819
+ },
12820
+ {
12821
+ "epoch": 0.37606129633464486,
12822
+ "grad_norm": 7.427321434020996,
12823
+ "learning_rate": 1.4595212378642697e-06,
12824
+ "loss": 4.2577,
12825
+ "step": 1816
12826
+ },
12827
+ {
12828
+ "epoch": 0.37626837854628287,
12829
+ "grad_norm": 6.533102035522461,
12830
+ "learning_rate": 1.4316553063678851e-06,
12831
+ "loss": 4.0092,
12832
+ "step": 1817
12833
+ },
12834
+ {
12835
+ "epoch": 0.3764754607579209,
12836
+ "grad_norm": 9.07302474975586,
12837
+ "learning_rate": 1.4040560425476411e-06,
12838
+ "loss": 3.955,
12839
+ "step": 1818
12840
+ },
12841
+ {
12842
+ "epoch": 0.37668254296955894,
12843
+ "grad_norm": 8.967131614685059,
12844
+ "learning_rate": 1.3767235210708107e-06,
12845
+ "loss": 3.5007,
12846
+ "step": 1819
12847
+ },
12848
+ {
12849
+ "epoch": 0.37688962518119695,
12850
+ "grad_norm": 7.1962761878967285,
12851
+ "learning_rate": 1.349657815883032e-06,
12852
+ "loss": 3.6339,
12853
+ "step": 1820
12854
+ },
12855
+ {
12856
+ "epoch": 0.37709670739283496,
12857
+ "grad_norm": 8.466217041015625,
12858
+ "learning_rate": 1.322859000208132e-06,
12859
+ "loss": 3.6523,
12860
+ "step": 1821
12861
+ },
12862
+ {
12863
+ "epoch": 0.37730378960447297,
12864
+ "grad_norm": 11.36102294921875,
12865
+ "learning_rate": 1.2963271465478488e-06,
12866
+ "loss": 3.1206,
12867
+ "step": 1822
12868
+ },
12869
+ {
12870
+ "epoch": 0.377510871816111,
12871
+ "grad_norm": 7.7683539390563965,
12872
+ "learning_rate": 1.270062326681698e-06,
12873
+ "loss": 4.508,
12874
+ "step": 1823
12875
+ },
12876
+ {
12877
+ "epoch": 0.377717954027749,
12878
+ "grad_norm": 6.693579196929932,
12879
+ "learning_rate": 1.244064611666751e-06,
12880
+ "loss": 4.529,
12881
+ "step": 1824
12882
+ },
12883
+ {
12884
+ "epoch": 0.37792503623938706,
12885
+ "grad_norm": 6.704649448394775,
12886
+ "learning_rate": 1.2183340718374681e-06,
12887
+ "loss": 4.2927,
12888
+ "step": 1825
12889
+ },
12890
+ {
12891
+ "epoch": 0.37813211845102507,
12892
+ "grad_norm": 7.658962726593018,
12893
+ "learning_rate": 1.1928707768054658e-06,
12894
+ "loss": 4.7976,
12895
+ "step": 1826
12896
+ },
12897
+ {
12898
+ "epoch": 0.3783392006626631,
12899
+ "grad_norm": 8.829057693481445,
12900
+ "learning_rate": 1.1676747954593837e-06,
12901
+ "loss": 4.4687,
12902
+ "step": 1827
12903
+ },
12904
+ {
12905
+ "epoch": 0.3785462828743011,
12906
+ "grad_norm": 8.466927528381348,
12907
+ "learning_rate": 1.14274619596465e-06,
12908
+ "loss": 3.5714,
12909
+ "step": 1828
12910
+ },
12911
+ {
12912
+ "epoch": 0.3787533650859391,
12913
+ "grad_norm": 9.557503700256348,
12914
+ "learning_rate": 1.1180850457633175e-06,
12915
+ "loss": 4.2684,
12916
+ "step": 1829
12917
+ },
12918
+ {
12919
+ "epoch": 0.37896044729757716,
12920
+ "grad_norm": 7.801200866699219,
12921
+ "learning_rate": 1.0936914115738717e-06,
12922
+ "loss": 4.8398,
12923
+ "step": 1830
12924
+ },
12925
+ {
12926
+ "epoch": 0.3791675295092152,
12927
+ "grad_norm": 7.245351314544678,
12928
+ "learning_rate": 1.0695653593910782e-06,
12929
+ "loss": 3.2947,
12930
+ "step": 1831
12931
+ },
12932
+ {
12933
+ "epoch": 0.3793746117208532,
12934
+ "grad_norm": 8.5117826461792,
12935
+ "learning_rate": 1.0457069544857813e-06,
12936
+ "loss": 3.9429,
12937
+ "step": 1832
12938
+ },
12939
+ {
12940
+ "epoch": 0.3795816939324912,
12941
+ "grad_norm": 26.09311294555664,
12942
+ "learning_rate": 1.0221162614047042e-06,
12943
+ "loss": 3.4436,
12944
+ "step": 1833
12945
+ },
12946
+ {
12947
+ "epoch": 0.3797887761441292,
12948
+ "grad_norm": 8.763874053955078,
12949
+ "learning_rate": 9.987933439703279e-07,
12950
+ "loss": 3.2206,
12951
+ "step": 1834
12952
+ },
12953
+ {
12954
+ "epoch": 0.3799958583557672,
12955
+ "grad_norm": 6.694812297821045,
12956
+ "learning_rate": 9.75738265280679e-07,
12957
+ "loss": 3.8137,
12958
+ "step": 1835
12959
+ },
12960
+ {
12961
+ "epoch": 0.3802029405674053,
12962
+ "grad_norm": 8.24753475189209,
12963
+ "learning_rate": 9.529510877091752e-07,
12964
+ "loss": 2.9927,
12965
+ "step": 1836
12966
+ },
12967
+ {
12968
+ "epoch": 0.3804100227790433,
12969
+ "grad_norm": 7.92348575592041,
12970
+ "learning_rate": 9.304318729044359e-07,
12971
+ "loss": 3.4538,
12972
+ "step": 1837
12973
+ },
12974
+ {
12975
+ "epoch": 0.3806171049906813,
12976
+ "grad_norm": 8.349137306213379,
12977
+ "learning_rate": 9.081806817901606e-07,
12978
+ "loss": 3.8553,
12979
+ "step": 1838
12980
+ },
12981
+ {
12982
+ "epoch": 0.3808241872023193,
12983
+ "grad_norm": 8.452718734741211,
12984
+ "learning_rate": 8.861975745649176e-07,
12985
+ "loss": 3.6945,
12986
+ "step": 1839
12987
+ },
12988
+ {
12989
+ "epoch": 0.3810312694139573,
12990
+ "grad_norm": 7.854761600494385,
12991
+ "learning_rate": 8.644826107019888e-07,
12992
+ "loss": 5.3701,
12993
+ "step": 1840
12994
+ },
12995
+ {
12996
+ "epoch": 0.3812383516255954,
12997
+ "grad_norm": 6.596001148223877,
12998
+ "learning_rate": 8.430358489492474e-07,
12999
+ "loss": 4.0305,
13000
+ "step": 1841
13001
+ },
13002
+ {
13003
+ "epoch": 0.3814454338372334,
13004
+ "grad_norm": 8.507204055786133,
13005
+ "learning_rate": 8.218573473289359e-07,
13006
+ "loss": 4.3795,
13007
+ "step": 1842
13008
+ },
13009
+ {
13010
+ "epoch": 0.3816525160488714,
13011
+ "grad_norm": 7.512143135070801,
13012
+ "learning_rate": 8.009471631375775e-07,
13013
+ "loss": 3.3766,
13014
+ "step": 1843
13015
+ },
13016
+ {
13017
+ "epoch": 0.3818595982605094,
13018
+ "grad_norm": 7.501784801483154,
13019
+ "learning_rate": 7.803053529457538e-07,
13020
+ "loss": 3.542,
13021
+ "step": 1844
13022
+ },
13023
+ {
13024
+ "epoch": 0.3820666804721474,
13025
+ "grad_norm": 7.17270040512085,
13026
+ "learning_rate": 7.599319725980048e-07,
13027
+ "loss": 4.8348,
13028
+ "step": 1845
13029
+ },
13030
+ {
13031
+ "epoch": 0.3822737626837855,
13032
+ "grad_norm": 8.266033172607422,
13033
+ "learning_rate": 7.398270772126736e-07,
13034
+ "loss": 3.6901,
13035
+ "step": 1846
13036
+ },
13037
+ {
13038
+ "epoch": 0.3824808448954235,
13039
+ "grad_norm": 7.612104415893555,
13040
+ "learning_rate": 7.19990721181707e-07,
13041
+ "loss": 3.8009,
13042
+ "step": 1847
13043
+ },
13044
+ {
13045
+ "epoch": 0.3826879271070615,
13046
+ "grad_norm": 8.199748992919922,
13047
+ "learning_rate": 7.004229581705546e-07,
13048
+ "loss": 4.2672,
13049
+ "step": 1848
13050
+ },
13051
+ {
13052
+ "epoch": 0.3828950093186995,
13053
+ "grad_norm": 9.074591636657715,
13054
+ "learning_rate": 6.811238411180365e-07,
13055
+ "loss": 5.1188,
13056
+ "step": 1849
13057
+ },
13058
+ {
13059
+ "epoch": 0.38310209153033753,
13060
+ "grad_norm": 6.480962753295898,
13061
+ "learning_rate": 6.620934222361319e-07,
13062
+ "loss": 3.6043,
13063
+ "step": 1850
13064
+ },
13065
+ {
13066
+ "epoch": 0.38330917374197554,
13067
+ "grad_norm": 7.479844093322754,
13068
+ "learning_rate": 6.433317530099237e-07,
13069
+ "loss": 4.7015,
13070
+ "step": 1851
13071
+ },
13072
+ {
13073
+ "epoch": 0.3835162559536136,
13074
+ "grad_norm": 6.926044464111328,
13075
+ "learning_rate": 6.248388841973762e-07,
13076
+ "loss": 3.9299,
13077
+ "step": 1852
13078
+ },
13079
+ {
13080
+ "epoch": 0.3837233381652516,
13081
+ "grad_norm": 8.056866645812988,
13082
+ "learning_rate": 6.066148658292914e-07,
13083
+ "loss": 4.4821,
13084
+ "step": 1853
13085
+ },
13086
+ {
13087
+ "epoch": 0.3839304203768896,
13088
+ "grad_norm": 8.139278411865234,
13089
+ "learning_rate": 5.886597472090638e-07,
13090
+ "loss": 4.6499,
13091
+ "step": 1854
13092
+ },
13093
+ {
13094
+ "epoch": 0.38413750258852764,
13095
+ "grad_norm": 12.200084686279297,
13096
+ "learning_rate": 5.709735769126478e-07,
13097
+ "loss": 4.5629,
13098
+ "step": 1855
13099
+ },
13100
+ {
13101
+ "epoch": 0.38434458480016565,
13102
+ "grad_norm": 11.336113929748535,
13103
+ "learning_rate": 5.535564027883577e-07,
13104
+ "loss": 4.3053,
13105
+ "step": 1856
13106
+ },
13107
+ {
13108
+ "epoch": 0.3845516670118037,
13109
+ "grad_norm": 7.85932731628418,
13110
+ "learning_rate": 5.364082719567787e-07,
13111
+ "loss": 4.3562,
13112
+ "step": 1857
13113
+ },
13114
+ {
13115
+ "epoch": 0.3847587492234417,
13116
+ "grad_norm": 7.8194427490234375,
13117
+ "learning_rate": 5.195292308106115e-07,
13118
+ "loss": 4.519,
13119
+ "step": 1858
13120
+ },
13121
+ {
13122
+ "epoch": 0.38496583143507973,
13123
+ "grad_norm": 9.013519287109375,
13124
+ "learning_rate": 5.029193250145503e-07,
13125
+ "loss": 3.9506,
13126
+ "step": 1859
13127
+ },
13128
+ {
13129
+ "epoch": 0.38517291364671774,
13130
+ "grad_norm": 8.887256622314453,
13131
+ "learning_rate": 4.865785995052052e-07,
13132
+ "loss": 4.1928,
13133
+ "step": 1860
13134
+ },
13135
+ {
13136
+ "epoch": 0.38537999585835575,
13137
+ "grad_norm": 9.979034423828125,
13138
+ "learning_rate": 4.7050709849091325e-07,
13139
+ "loss": 4.1995,
13140
+ "step": 1861
13141
+ },
13142
+ {
13143
+ "epoch": 0.38558707806999376,
13144
+ "grad_norm": 8.198508262634277,
13145
+ "learning_rate": 4.547048654516495e-07,
13146
+ "loss": 4.5163,
13147
+ "step": 1862
13148
+ },
13149
+ {
13150
+ "epoch": 0.3857941602816318,
13151
+ "grad_norm": 7.107065677642822,
13152
+ "learning_rate": 4.391719431389163e-07,
13153
+ "loss": 4.0739,
13154
+ "step": 1863
13155
+ },
13156
+ {
13157
+ "epoch": 0.38600124249326984,
13158
+ "grad_norm": 7.956059455871582,
13159
+ "learning_rate": 4.2390837357563217e-07,
13160
+ "loss": 3.6853,
13161
+ "step": 1864
13162
+ },
13163
+ {
13164
+ "epoch": 0.38620832470490785,
13165
+ "grad_norm": 7.028172969818115,
13166
+ "learning_rate": 4.089141980559763e-07,
13167
+ "loss": 4.2296,
13168
+ "step": 1865
13169
+ },
13170
+ {
13171
+ "epoch": 0.38641540691654586,
13172
+ "grad_norm": 8.081233978271484,
13173
+ "learning_rate": 3.941894571453442e-07,
13174
+ "loss": 3.9931,
13175
+ "step": 1866
13176
+ },
13177
+ {
13178
+ "epoch": 0.38662248912818387,
13179
+ "grad_norm": 7.969577789306641,
13180
+ "learning_rate": 3.797341906801588e-07,
13181
+ "loss": 3.5845,
13182
+ "step": 1867
13183
+ },
13184
+ {
13185
+ "epoch": 0.38682957133982193,
13186
+ "grad_norm": 9.258206367492676,
13187
+ "learning_rate": 3.655484377678375e-07,
13188
+ "loss": 3.5585,
13189
+ "step": 1868
13190
+ },
13191
+ {
13192
+ "epoch": 0.38703665355145994,
13193
+ "grad_norm": 8.422988891601562,
13194
+ "learning_rate": 3.516322367866476e-07,
13195
+ "loss": 3.89,
13196
+ "step": 1869
13197
+ },
13198
+ {
13199
+ "epoch": 0.38724373576309795,
13200
+ "grad_norm": 7.004971981048584,
13201
+ "learning_rate": 3.379856253855951e-07,
13202
+ "loss": 3.4266,
13203
+ "step": 1870
13204
+ },
13205
+ {
13206
+ "epoch": 0.38745081797473596,
13207
+ "grad_norm": 9.541719436645508,
13208
+ "learning_rate": 3.2460864048434735e-07,
13209
+ "loss": 3.2493,
13210
+ "step": 1871
13211
+ },
13212
+ {
13213
+ "epoch": 0.38765790018637397,
13214
+ "grad_norm": 7.930725574493408,
13215
+ "learning_rate": 3.115013182731219e-07,
13216
+ "loss": 3.0596,
13217
+ "step": 1872
13218
+ },
13219
+ {
13220
+ "epoch": 0.38786498239801204,
13221
+ "grad_norm": 10.04166316986084,
13222
+ "learning_rate": 2.986636942125753e-07,
13223
+ "loss": 3.9377,
13224
+ "step": 1873
13225
+ },
13226
+ {
13227
+ "epoch": 0.38807206460965005,
13228
+ "grad_norm": 8.160487174987793,
13229
+ "learning_rate": 2.860958030337368e-07,
13230
+ "loss": 3.7294,
13231
+ "step": 1874
13232
+ },
13233
+ {
13234
+ "epoch": 0.38827914682128806,
13235
+ "grad_norm": 8.443591117858887,
13236
+ "learning_rate": 2.73797678737886e-07,
13237
+ "loss": 4.0411,
13238
+ "step": 1875
13239
+ },
13240
+ {
13241
+ "epoch": 0.38848622903292607,
13242
+ "grad_norm": 7.043831825256348,
13243
+ "learning_rate": 2.61769354596475e-07,
13244
+ "loss": 5.0244,
13245
+ "step": 1876
13246
+ },
13247
+ {
13248
+ "epoch": 0.3886933112445641,
13249
+ "grad_norm": 7.078221797943115,
13250
+ "learning_rate": 2.500108631510512e-07,
13251
+ "loss": 3.8743,
13252
+ "step": 1877
13253
+ },
13254
+ {
13255
+ "epoch": 0.3889003934562021,
13256
+ "grad_norm": 7.39813756942749,
13257
+ "learning_rate": 2.385222362131345e-07,
13258
+ "loss": 2.9238,
13259
+ "step": 1878
13260
+ },
13261
+ {
13262
+ "epoch": 0.38910747566784015,
13263
+ "grad_norm": 8.55196475982666,
13264
+ "learning_rate": 2.2730350486416253e-07,
13265
+ "loss": 4.0762,
13266
+ "step": 1879
13267
+ },
13268
+ {
13269
+ "epoch": 0.38931455787947816,
13270
+ "grad_norm": 7.475944519042969,
13271
+ "learning_rate": 2.163546994553789e-07,
13272
+ "loss": 4.8681,
13273
+ "step": 1880
13274
+ },
13275
+ {
13276
+ "epoch": 0.3895216400911162,
13277
+ "grad_norm": 19.760482788085938,
13278
+ "learning_rate": 2.056758496077893e-07,
13279
+ "loss": 4.3132,
13280
+ "step": 1881
13281
+ },
13282
+ {
13283
+ "epoch": 0.3897287223027542,
13284
+ "grad_norm": 7.659260272979736,
13285
+ "learning_rate": 1.9526698421206136e-07,
13286
+ "loss": 3.8448,
13287
+ "step": 1882
13288
+ },
13289
+ {
13290
+ "epoch": 0.3899358045143922,
13291
+ "grad_norm": 11.384870529174805,
13292
+ "learning_rate": 1.8512813142840257e-07,
13293
+ "loss": 4.5382,
13294
+ "step": 1883
13295
+ },
13296
+ {
13297
+ "epoch": 0.39014288672603026,
13298
+ "grad_norm": 7.478827953338623,
13299
+ "learning_rate": 1.752593186865714e-07,
13300
+ "loss": 3.6739,
13301
+ "step": 1884
13302
+ },
13303
+ {
13304
+ "epoch": 0.39034996893766827,
13305
+ "grad_norm": 7.702869892120361,
13306
+ "learning_rate": 1.6566057268574408e-07,
13307
+ "loss": 4.1893,
13308
+ "step": 1885
13309
+ },
13310
+ {
13311
+ "epoch": 0.3905570511493063,
13312
+ "grad_norm": 7.367177963256836,
13313
+ "learning_rate": 1.5633191939444791e-07,
13314
+ "loss": 3.7774,
13315
+ "step": 1886
13316
+ },
13317
+ {
13318
+ "epoch": 0.3907641333609443,
13319
+ "grad_norm": 24.913116455078125,
13320
+ "learning_rate": 1.4727338405051693e-07,
13321
+ "loss": 3.9881,
13322
+ "step": 1887
13323
+ },
13324
+ {
13325
+ "epoch": 0.3909712155725823,
13326
+ "grad_norm": 7.524211883544922,
13327
+ "learning_rate": 1.384849911609809e-07,
13328
+ "loss": 4.513,
13329
+ "step": 1888
13330
+ },
13331
+ {
13332
+ "epoch": 0.3911782977842203,
13333
+ "grad_norm": 8.035744667053223,
13334
+ "learning_rate": 1.2996676450203193e-07,
13335
+ "loss": 3.6691,
13336
+ "step": 1889
13337
+ },
13338
+ {
13339
+ "epoch": 0.3913853799958584,
13340
+ "grad_norm": 8.68524169921875,
13341
+ "learning_rate": 1.2171872711895794e-07,
13342
+ "loss": 4.1598,
13343
+ "step": 1890
13344
+ },
13345
+ {
13346
+ "epoch": 0.3915924622074964,
13347
+ "grad_norm": 8.163741111755371,
13348
+ "learning_rate": 1.1374090132608706e-07,
13349
+ "loss": 3.3935,
13350
+ "step": 1891
13351
+ },
13352
+ {
13353
+ "epoch": 0.3917995444191344,
13354
+ "grad_norm": 8.107989311218262,
13355
+ "learning_rate": 1.0603330870668782e-07,
13356
+ "loss": 3.6385,
13357
+ "step": 1892
13358
+ },
13359
+ {
13360
+ "epoch": 0.3920066266307724,
13361
+ "grad_norm": 6.142742156982422,
13362
+ "learning_rate": 9.859597011294686e-08,
13363
+ "loss": 3.6153,
13364
+ "step": 1893
13365
+ },
13366
+ {
13367
+ "epoch": 0.3922137088424104,
13368
+ "grad_norm": 9.46705150604248,
13369
+ "learning_rate": 9.142890566591344e-08,
13370
+ "loss": 4.1681,
13371
+ "step": 1894
13372
+ },
13373
+ {
13374
+ "epoch": 0.3924207910540485,
13375
+ "grad_norm": 8.04094409942627,
13376
+ "learning_rate": 8.453213475543287e-08,
13377
+ "loss": 4.5136,
13378
+ "step": 1895
13379
+ },
13380
+ {
13381
+ "epoch": 0.3926278732656865,
13382
+ "grad_norm": 9.353519439697266,
13383
+ "learning_rate": 7.790567604007981e-08,
13384
+ "loss": 5.6024,
13385
+ "step": 1896
13386
+ },
13387
+ {
13388
+ "epoch": 0.3928349554773245,
13389
+ "grad_norm": 8.01060676574707,
13390
+ "learning_rate": 7.154954744713616e-08,
13391
+ "loss": 4.1784,
13392
+ "step": 1897
13393
+ },
13394
+ {
13395
+ "epoch": 0.3930420376889625,
13396
+ "grad_norm": 7.64965295791626,
13397
+ "learning_rate": 6.546376617252437e-08,
13398
+ "loss": 4.1038,
13399
+ "step": 1898
13400
+ },
13401
+ {
13402
+ "epoch": 0.3932491199006005,
13403
+ "grad_norm": 9.069695472717285,
13404
+ "learning_rate": 5.964834868078529e-08,
13405
+ "loss": 3.8891,
13406
+ "step": 1899
13407
+ },
13408
+ {
13409
+ "epoch": 0.3934562021122386,
13410
+ "grad_norm": 7.1893110275268555,
13411
+ "learning_rate": 5.4103310704989305e-08,
13412
+ "loss": 4.2647,
13413
+ "step": 1900
13414
+ },
13415
+ {
13416
+ "epoch": 0.3936632843238766,
13417
+ "grad_norm": 7.455760478973389,
13418
+ "learning_rate": 4.8828667246736406e-08,
13419
+ "loss": 4.6849,
13420
+ "step": 1901
13421
+ },
13422
+ {
13423
+ "epoch": 0.3938703665355146,
13424
+ "grad_norm": 8.376263618469238,
13425
+ "learning_rate": 4.382443257610058e-08,
13426
+ "loss": 3.1997,
13427
+ "step": 1902
13428
+ },
13429
+ {
13430
+ "epoch": 0.3940774487471526,
13431
+ "grad_norm": 10.802031517028809,
13432
+ "learning_rate": 3.909062023156329e-08,
13433
+ "loss": 3.5163,
13434
+ "step": 1903
13435
+ },
13436
+ {
13437
+ "epoch": 0.3942845309587906,
13438
+ "grad_norm": 7.496048450469971,
13439
+ "learning_rate": 3.4627243020035616e-08,
13440
+ "loss": 3.9053,
13441
+ "step": 1904
13442
+ },
13443
+ {
13444
+ "epoch": 0.39449161317042863,
13445
+ "grad_norm": 8.064364433288574,
13446
+ "learning_rate": 3.0434313016780567e-08,
13447
+ "loss": 3.7754,
13448
+ "step": 1905
13449
+ },
13450
+ {
13451
+ "epoch": 0.3946986953820667,
13452
+ "grad_norm": 7.672548294067383,
13453
+ "learning_rate": 2.651184156539088e-08,
13454
+ "loss": 3.9151,
13455
+ "step": 1906
13456
+ },
13457
+ {
13458
+ "epoch": 0.3949057775937047,
13459
+ "grad_norm": 6.153156280517578,
13460
+ "learning_rate": 2.2859839277733497e-08,
13461
+ "loss": 3.0665,
13462
+ "step": 1907
13463
+ },
13464
+ {
13465
+ "epoch": 0.3951128598053427,
13466
+ "grad_norm": 7.630539417266846,
13467
+ "learning_rate": 1.9478316033993972e-08,
13468
+ "loss": 3.1406,
13469
+ "step": 1908
13470
+ },
13471
+ {
13472
+ "epoch": 0.39531994201698073,
13473
+ "grad_norm": 8.899959564208984,
13474
+ "learning_rate": 1.636728098256546e-08,
13475
+ "loss": 3.58,
13476
+ "step": 1909
13477
+ },
13478
+ {
13479
+ "epoch": 0.39552702422861874,
13480
+ "grad_norm": 7.002161026000977,
13481
+ "learning_rate": 1.3526742540070913e-08,
13482
+ "loss": 4.3189,
13483
+ "step": 1910
13484
+ },
13485
+ {
13486
+ "epoch": 0.3957341064402568,
13487
+ "grad_norm": 8.042828559875488,
13488
+ "learning_rate": 1.0956708391318681e-08,
13489
+ "loss": 3.0454,
13490
+ "step": 1911
13491
+ },
13492
+ {
13493
+ "epoch": 0.3959411886518948,
13494
+ "grad_norm": 10.092788696289062,
13495
+ "learning_rate": 8.657185489313601e-09,
13496
+ "loss": 4.2819,
13497
+ "step": 1912
13498
+ },
13499
+ {
13500
+ "epoch": 0.3961482708635328,
13501
+ "grad_norm": 6.035614013671875,
13502
+ "learning_rate": 6.628180055201494e-09,
13503
+ "loss": 4.1054,
13504
+ "step": 1913
13505
+ },
13506
+ {
13507
+ "epoch": 0.39635535307517084,
13508
+ "grad_norm": 7.719038009643555,
13509
+ "learning_rate": 4.869697578269161e-09,
13510
+ "loss": 3.5056,
13511
+ "step": 1914
13512
+ },
13513
+ {
13514
+ "epoch": 0.39656243528680885,
13515
+ "grad_norm": 8.446083068847656,
13516
+ "learning_rate": 3.3817428159443886e-09,
13517
+ "loss": 3.7232,
13518
+ "step": 1915
13519
+ },
13520
+ {
13521
+ "epoch": 0.3967695174984469,
13522
+ "grad_norm": 8.734418869018555,
13523
+ "learning_rate": 2.16431979372933e-09,
13524
+ "loss": 3.8401,
13525
+ "step": 1916
13526
+ },
13527
+ {
13528
+ "epoch": 0.3969765997100849,
13529
+ "grad_norm": 8.412437438964844,
13530
+ "learning_rate": 1.217431805267122e-09,
13531
+ "loss": 3.4617,
13532
+ "step": 1917
13533
+ },
13534
+ {
13535
+ "epoch": 0.39718368192172293,
13536
+ "grad_norm": 7.3742594718933105,
13537
+ "learning_rate": 5.410814122752684e-10,
13538
+ "loss": 3.4868,
13539
+ "step": 1918
13540
+ },
13541
+ {
13542
+ "epoch": 0.39739076413336094,
13543
+ "grad_norm": 8.769962310791016,
13544
+ "learning_rate": 1.3527044455674543e-10,
13545
+ "loss": 4.4384,
13546
+ "step": 1919
13547
+ },
13548
+ {
13549
+ "epoch": 0.39759784634499895,
13550
+ "grad_norm": 9.391153335571289,
13551
+ "learning_rate": 0.0,
13552
+ "loss": 3.1379,
13553
+ "step": 1920
13554
  }
13555
  ],
13556
  "logging_steps": 1,
 
13574
  "should_evaluate": false,
13575
  "should_log": false,
13576
  "should_save": true,
13577
+ "should_training_stop": true
13578
  },
13579
  "attributes": {}
13580
  }
13581
  },
13582
+ "total_flos": 2.2576652660559053e+18,
13583
  "train_batch_size": 2,
13584
  "trial_name": null,
13585
  "trial_params": null