spacebert / trainer_state.json
aberquand's picture
push of spaceBERT IEEE paper version
940c03e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 68.99953929788998,
"global_step": 46782,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3331797659633281,
"learning_rate": 4.976190476190477e-05,
"loss": 2.685059944085315,
"step": 226
},
{
"epoch": 0.6663595319266562,
"learning_rate": 4.9523809523809525e-05,
"loss": 2.5215273156630253,
"step": 452
},
{
"epoch": 0.9995392978899843,
"learning_rate": 4.928571428571429e-05,
"loss": 2.449192013360758,
"step": 678
},
{
"epoch": 1.3331797659633282,
"learning_rate": 4.904761904761905e-05,
"loss": 2.3792454542311945,
"step": 904
},
{
"epoch": 1.6663595319266562,
"learning_rate": 4.880952380952381e-05,
"loss": 2.3559764760785398,
"step": 1130
},
{
"epoch": 1.9995392978899842,
"learning_rate": 4.8571428571428576e-05,
"loss": 2.328374001832135,
"step": 1356
},
{
"epoch": 2.333179765963328,
"learning_rate": 4.8333333333333334e-05,
"loss": 2.2863990986241705,
"step": 1582
},
{
"epoch": 2.6663595319266564,
"learning_rate": 4.80952380952381e-05,
"loss": 2.2656879256256914,
"step": 1808
},
{
"epoch": 2.999539297889984,
"learning_rate": 4.785714285714286e-05,
"loss": 2.245354711481955,
"step": 2034
},
{
"epoch": 3.333179765963328,
"learning_rate": 4.761904761904762e-05,
"loss": 2.2154734113575083,
"step": 2260
},
{
"epoch": 3.6663595319266564,
"learning_rate": 4.738095238095238e-05,
"loss": 2.19904536694552,
"step": 2486
},
{
"epoch": 3.999539297889984,
"learning_rate": 4.714285714285714e-05,
"loss": 2.185555112045423,
"step": 2712
},
{
"epoch": 4.333179765963328,
"learning_rate": 4.690476190476191e-05,
"loss": 2.162922816993916,
"step": 2938
},
{
"epoch": 4.666359531926656,
"learning_rate": 4.666666666666667e-05,
"loss": 2.1419683135716263,
"step": 3164
},
{
"epoch": 4.999539297889984,
"learning_rate": 4.642857142857143e-05,
"loss": 2.1468805633814987,
"step": 3390
},
{
"epoch": 5.333179765963328,
"learning_rate": 4.6190476190476194e-05,
"loss": 2.114357872346861,
"step": 3616
},
{
"epoch": 5.666359531926656,
"learning_rate": 4.595238095238095e-05,
"loss": 2.1005791858234235,
"step": 3842
},
{
"epoch": 5.999539297889984,
"learning_rate": 4.5714285714285716e-05,
"loss": 2.0986059408272264,
"step": 4068
},
{
"epoch": 6.333179765963328,
"learning_rate": 4.547619047619048e-05,
"loss": 2.063803073579231,
"step": 4294
},
{
"epoch": 6.666359531926656,
"learning_rate": 4.523809523809524e-05,
"loss": 2.0596065014864493,
"step": 4520
},
{
"epoch": 6.999539297889984,
"learning_rate": 4.5e-05,
"loss": 2.0634367276081997,
"step": 4746
},
{
"epoch": 7.333179765963328,
"learning_rate": 4.476190476190477e-05,
"loss": 2.049277921693515,
"step": 4972
},
{
"epoch": 7.666359531926656,
"learning_rate": 4.4523809523809525e-05,
"loss": 2.0317085738730642,
"step": 5198
},
{
"epoch": 7.999539297889984,
"learning_rate": 4.428571428571428e-05,
"loss": 2.0282083697023645,
"step": 5424
},
{
"epoch": 8.333179765963328,
"learning_rate": 4.404761904761905e-05,
"loss": 1.9894960116496128,
"step": 5650
},
{
"epoch": 8.666359531926656,
"learning_rate": 4.380952380952381e-05,
"loss": 1.996843321133504,
"step": 5876
},
{
"epoch": 8.999539297889985,
"learning_rate": 4.3571428571428576e-05,
"loss": 2.0045918152395603,
"step": 6102
},
{
"epoch": 9.333179765963328,
"learning_rate": 4.3333333333333334e-05,
"loss": 1.966206238333103,
"step": 6328
},
{
"epoch": 9.666359531926656,
"learning_rate": 4.30952380952381e-05,
"loss": 1.9711824940369191,
"step": 6554
},
{
"epoch": 9.999539297889985,
"learning_rate": 4.2857142857142856e-05,
"loss": 1.9805989223243916,
"step": 6780
},
{
"epoch": 10.333179765963328,
"learning_rate": 4.261904761904762e-05,
"loss": 1.9500477208500415,
"step": 7006
},
{
"epoch": 10.666359531926656,
"learning_rate": 4.2380952380952385e-05,
"loss": 1.9578322689090155,
"step": 7232
},
{
"epoch": 10.999539297889985,
"learning_rate": 4.214285714285714e-05,
"loss": 1.9484224572645878,
"step": 7458
},
{
"epoch": 11.333179765963328,
"learning_rate": 4.190476190476191e-05,
"loss": 1.9283131287161228,
"step": 7684
},
{
"epoch": 11.666359531926656,
"learning_rate": 4.166666666666667e-05,
"loss": 1.9156345603740321,
"step": 7910
},
{
"epoch": 11.999539297889985,
"learning_rate": 4.1428571428571437e-05,
"loss": 1.926576732534223,
"step": 8136
},
{
"epoch": 12.333179765963328,
"learning_rate": 4.119047619047619e-05,
"loss": 1.907513238687431,
"step": 8362
},
{
"epoch": 12.666359531926656,
"learning_rate": 4.095238095238095e-05,
"loss": 1.9078028856125553,
"step": 8588
},
{
"epoch": 12.999539297889985,
"learning_rate": 4.0714285714285717e-05,
"loss": 1.90263299182453,
"step": 8814
},
{
"epoch": 13.333179765963328,
"learning_rate": 4.047619047619048e-05,
"loss": 1.8844813794161366,
"step": 9040
},
{
"epoch": 13.666359531926656,
"learning_rate": 4.023809523809524e-05,
"loss": 1.8840125429946764,
"step": 9266
},
{
"epoch": 13.999539297889985,
"learning_rate": 4e-05,
"loss": 1.8717386701465708,
"step": 9492
},
{
"epoch": 14.333179765963328,
"learning_rate": 3.976190476190476e-05,
"loss": 1.858954910683421,
"step": 9718
},
{
"epoch": 14.666359531926656,
"learning_rate": 3.9523809523809526e-05,
"loss": 1.8676287895810288,
"step": 9944
},
{
"epoch": 14.999539297889985,
"learning_rate": 3.928571428571429e-05,
"loss": 1.8678895393304065,
"step": 10170
},
{
"epoch": 15.333179765963328,
"learning_rate": 3.904761904761905e-05,
"loss": 1.8486336227011892,
"step": 10396
},
{
"epoch": 15.666359531926656,
"learning_rate": 3.880952380952381e-05,
"loss": 1.8404835388723728,
"step": 10622
},
{
"epoch": 15.999539297889985,
"learning_rate": 3.857142857142858e-05,
"loss": 1.8481951688243226,
"step": 10848
},
{
"epoch": 16.33317976596333,
"learning_rate": 3.8333333333333334e-05,
"loss": 1.8267865476355087,
"step": 11074
},
{
"epoch": 16.666359531926656,
"learning_rate": 3.809523809523809e-05,
"loss": 1.818214213953609,
"step": 11300
},
{
"epoch": 16.999539297889985,
"learning_rate": 3.785714285714286e-05,
"loss": 1.8284451645032493,
"step": 11526
},
{
"epoch": 17.33317976596333,
"learning_rate": 3.761904761904762e-05,
"loss": 1.8039584539632882,
"step": 11752
},
{
"epoch": 17.666359531926656,
"learning_rate": 3.7380952380952386e-05,
"loss": 1.8007052261217507,
"step": 11978
},
{
"epoch": 17.999539297889985,
"learning_rate": 3.7142857142857143e-05,
"loss": 1.800786381274198,
"step": 12204
},
{
"epoch": 18.33317976596333,
"learning_rate": 3.690476190476191e-05,
"loss": 1.7864516266679342,
"step": 12430
},
{
"epoch": 18.666359531926656,
"learning_rate": 3.6666666666666666e-05,
"loss": 1.790079842626521,
"step": 12656
},
{
"epoch": 18.999539297889985,
"learning_rate": 3.642857142857143e-05,
"loss": 1.7918755185287611,
"step": 12882
},
{
"epoch": 19.33317976596333,
"learning_rate": 3.619047619047619e-05,
"loss": 1.7703251121318446,
"step": 13108
},
{
"epoch": 19.666359531926656,
"learning_rate": 3.595238095238095e-05,
"loss": 1.7819123394721377,
"step": 13334
},
{
"epoch": 19.999539297889985,
"learning_rate": 3.571428571428572e-05,
"loss": 1.7857482370022124,
"step": 13560
},
{
"epoch": 20.33317976596333,
"learning_rate": 3.547619047619048e-05,
"loss": 1.7561523977634126,
"step": 13786
},
{
"epoch": 20.666359531926656,
"learning_rate": 3.523809523809524e-05,
"loss": 1.7562858108925608,
"step": 14012
},
{
"epoch": 20.999539297889985,
"learning_rate": 3.5e-05,
"loss": 1.767443496569068,
"step": 14238
},
{
"epoch": 21.33317976596333,
"learning_rate": 3.476190476190476e-05,
"loss": 1.7421298406820382,
"step": 14464
},
{
"epoch": 21.666359531926656,
"learning_rate": 3.4523809523809526e-05,
"loss": 1.7460298791395878,
"step": 14690
},
{
"epoch": 21.999539297889985,
"learning_rate": 3.428571428571429e-05,
"loss": 1.7635893695122373,
"step": 14916
},
{
"epoch": 22.33317976596333,
"learning_rate": 3.404761904761905e-05,
"loss": 1.7318757993985066,
"step": 15142
},
{
"epoch": 22.666359531926656,
"learning_rate": 3.380952380952381e-05,
"loss": 1.7320329784292035,
"step": 15368
},
{
"epoch": 22.999539297889985,
"learning_rate": 3.357142857142857e-05,
"loss": 1.7314567903501799,
"step": 15594
},
{
"epoch": 23.33317976596333,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.7091308863817063,
"step": 15820
},
{
"epoch": 23.666359531926656,
"learning_rate": 3.309523809523809e-05,
"loss": 1.7127776019341123,
"step": 16046
},
{
"epoch": 23.999539297889985,
"learning_rate": 3.285714285714286e-05,
"loss": 1.714874267578125,
"step": 16272
},
{
"epoch": 24.33317976596333,
"learning_rate": 3.261904761904762e-05,
"loss": 1.7092985980278623,
"step": 16498
},
{
"epoch": 24.666359531926656,
"learning_rate": 3.2380952380952386e-05,
"loss": 1.7022116331927544,
"step": 16724
},
{
"epoch": 24.999539297889985,
"learning_rate": 3.2142857142857144e-05,
"loss": 1.7083171743207273,
"step": 16950
},
{
"epoch": 25.33317976596333,
"learning_rate": 3.19047619047619e-05,
"loss": 1.6961614558127074,
"step": 17176
},
{
"epoch": 25.666359531926656,
"learning_rate": 3.1666666666666666e-05,
"loss": 1.6968883413129148,
"step": 17402
},
{
"epoch": 25.999539297889985,
"learning_rate": 3.142857142857143e-05,
"loss": 1.6995788844285813,
"step": 17628
},
{
"epoch": 26.33317976596333,
"learning_rate": 3.1190476190476195e-05,
"loss": 1.6844244762859513,
"step": 17854
},
{
"epoch": 26.666359531926656,
"learning_rate": 3.095238095238095e-05,
"loss": 1.6839947995886337,
"step": 18080
},
{
"epoch": 26.999539297889985,
"learning_rate": 3.071428571428572e-05,
"loss": 1.6868854623980227,
"step": 18306
},
{
"epoch": 27.33317976596333,
"learning_rate": 3.0476190476190482e-05,
"loss": 1.6642917869365321,
"step": 18532
},
{
"epoch": 27.666359531926656,
"learning_rate": 3.0238095238095236e-05,
"loss": 1.6676389981160122,
"step": 18758
},
{
"epoch": 27.999539297889985,
"learning_rate": 3e-05,
"loss": 1.6597493939695105,
"step": 18984
},
{
"epoch": 28.33317976596333,
"learning_rate": 2.9761904761904762e-05,
"loss": 1.652435842868501,
"step": 19210
},
{
"epoch": 28.666359531926656,
"learning_rate": 2.9523809523809526e-05,
"loss": 1.6600899485360205,
"step": 19436
},
{
"epoch": 28.999539297889985,
"learning_rate": 2.9285714285714288e-05,
"loss": 1.6490822851130393,
"step": 19662
},
{
"epoch": 29.33317976596333,
"learning_rate": 2.9047619047619052e-05,
"loss": 1.6511269628474143,
"step": 19888
},
{
"epoch": 29.666359531926656,
"learning_rate": 2.880952380952381e-05,
"loss": 1.6514149893701604,
"step": 20114
},
{
"epoch": 29.999539297889985,
"learning_rate": 2.857142857142857e-05,
"loss": 1.6452392308057937,
"step": 20340
},
{
"epoch": 30.33317976596333,
"learning_rate": 2.8333333333333335e-05,
"loss": 1.6428464366271434,
"step": 20566
},
{
"epoch": 30.666359531926656,
"learning_rate": 2.8095238095238096e-05,
"loss": 1.632520692538371,
"step": 20792
},
{
"epoch": 30.999539297889985,
"learning_rate": 2.785714285714286e-05,
"loss": 1.6398790747718472,
"step": 21018
},
{
"epoch": 31.33317976596333,
"learning_rate": 2.7619047619047622e-05,
"loss": 1.6239518697282909,
"step": 21244
},
{
"epoch": 31.666359531926656,
"learning_rate": 2.7380952380952383e-05,
"loss": 1.6242760852374862,
"step": 21470
},
{
"epoch": 31.999539297889985,
"learning_rate": 2.714285714285714e-05,
"loss": 1.627472463962251,
"step": 21696
},
{
"epoch": 32.33317976596333,
"learning_rate": 2.6904761904761905e-05,
"loss": 1.6053812482715708,
"step": 21922
},
{
"epoch": 32.66635953192666,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.610074878793902,
"step": 22148
},
{
"epoch": 32.99953929788998,
"learning_rate": 2.642857142857143e-05,
"loss": 1.620673390616358,
"step": 22374
},
{
"epoch": 33.33317976596333,
"learning_rate": 2.6190476190476192e-05,
"loss": 1.6081807634471792,
"step": 22600
},
{
"epoch": 33.66635953192666,
"learning_rate": 2.5952380952380957e-05,
"loss": 1.6156560847189574,
"step": 22826
},
{
"epoch": 33.99953929788998,
"learning_rate": 2.5714285714285714e-05,
"loss": 1.6041984895689299,
"step": 23052
},
{
"epoch": 34.33317976596333,
"learning_rate": 2.5476190476190476e-05,
"loss": 1.5947894881256914,
"step": 23278
},
{
"epoch": 34.66635953192666,
"learning_rate": 2.523809523809524e-05,
"loss": 1.5988714167502074,
"step": 23504
},
{
"epoch": 34.99953929788998,
"learning_rate": 2.5e-05,
"loss": 1.5926924173810841,
"step": 23730
},
{
"epoch": 35.33317976596333,
"learning_rate": 2.4761904761904762e-05,
"loss": 1.5834472116115874,
"step": 23956
},
{
"epoch": 35.66635953192666,
"learning_rate": 2.4523809523809523e-05,
"loss": 1.5848133458500415,
"step": 24182
},
{
"epoch": 35.99953929788998,
"learning_rate": 2.4285714285714288e-05,
"loss": 1.602300593283324,
"step": 24408
},
{
"epoch": 36.33317976596333,
"learning_rate": 2.404761904761905e-05,
"loss": 1.5835503772296737,
"step": 24634
},
{
"epoch": 36.66635953192666,
"learning_rate": 2.380952380952381e-05,
"loss": 1.5831868669628042,
"step": 24860
},
{
"epoch": 36.99953929788998,
"learning_rate": 2.357142857142857e-05,
"loss": 1.5691194787489628,
"step": 25086
},
{
"epoch": 37.33317976596333,
"learning_rate": 2.3333333333333336e-05,
"loss": 1.568113208872027,
"step": 25312
},
{
"epoch": 37.66635953192666,
"learning_rate": 2.3095238095238097e-05,
"loss": 1.5696247742239353,
"step": 25538
},
{
"epoch": 37.99953929788998,
"learning_rate": 2.2857142857142858e-05,
"loss": 1.5706803313398783,
"step": 25764
},
{
"epoch": 38.33317976596333,
"learning_rate": 2.261904761904762e-05,
"loss": 1.5539683114110896,
"step": 25990
},
{
"epoch": 38.66635953192666,
"learning_rate": 2.2380952380952384e-05,
"loss": 1.5682053017405282,
"step": 26216
},
{
"epoch": 38.99953929788998,
"learning_rate": 2.214285714285714e-05,
"loss": 1.5620073976769913,
"step": 26442
},
{
"epoch": 39.33317976596333,
"learning_rate": 2.1904761904761906e-05,
"loss": 1.5591868172704646,
"step": 26668
},
{
"epoch": 39.66635953192666,
"learning_rate": 2.1666666666666667e-05,
"loss": 1.5566102424554065,
"step": 26894
},
{
"epoch": 39.99953929788998,
"learning_rate": 2.1428571428571428e-05,
"loss": 1.5528145849177268,
"step": 27120
},
{
"epoch": 40.33317976596333,
"learning_rate": 2.1190476190476193e-05,
"loss": 1.5426600633469303,
"step": 27346
},
{
"epoch": 40.66635953192666,
"learning_rate": 2.0952380952380954e-05,
"loss": 1.541890642284292,
"step": 27572
},
{
"epoch": 40.99953929788998,
"learning_rate": 2.0714285714285718e-05,
"loss": 1.5517368823026134,
"step": 27798
},
{
"epoch": 41.33317976596333,
"learning_rate": 2.0476190476190476e-05,
"loss": 1.540764057530766,
"step": 28024
},
{
"epoch": 41.66635953192666,
"learning_rate": 2.023809523809524e-05,
"loss": 1.5214318469562362,
"step": 28250
},
{
"epoch": 41.99953929788998,
"learning_rate": 2e-05,
"loss": 1.5345232128042035,
"step": 28476
},
{
"epoch": 42.33317976596333,
"learning_rate": 1.9761904761904763e-05,
"loss": 1.5242220448181691,
"step": 28702
},
{
"epoch": 42.66635953192666,
"learning_rate": 1.9523809523809524e-05,
"loss": 1.535507067114906,
"step": 28928
},
{
"epoch": 42.99953929788998,
"learning_rate": 1.928571428571429e-05,
"loss": 1.5329083468006774,
"step": 29154
},
{
"epoch": 43.33317976596333,
"learning_rate": 1.9047619047619046e-05,
"loss": 1.5211832502246958,
"step": 29380
},
{
"epoch": 43.66635953192666,
"learning_rate": 1.880952380952381e-05,
"loss": 1.5096026395274476,
"step": 29606
},
{
"epoch": 43.99953929788998,
"learning_rate": 1.8571428571428572e-05,
"loss": 1.5239440107767561,
"step": 29832
},
{
"epoch": 44.33317976596333,
"learning_rate": 1.8333333333333333e-05,
"loss": 1.518264635474281,
"step": 30058
},
{
"epoch": 44.66635953192666,
"learning_rate": 1.8095238095238094e-05,
"loss": 1.5015280394427544,
"step": 30284
},
{
"epoch": 44.99953929788998,
"learning_rate": 1.785714285714286e-05,
"loss": 1.5198267033669801,
"step": 30510
},
{
"epoch": 45.33317976596333,
"learning_rate": 1.761904761904762e-05,
"loss": 1.5029577744745575,
"step": 30736
},
{
"epoch": 45.66635953192666,
"learning_rate": 1.738095238095238e-05,
"loss": 1.5046313800642976,
"step": 30962
},
{
"epoch": 45.99953929788998,
"learning_rate": 1.7142857142857145e-05,
"loss": 1.5148508527637583,
"step": 31188
},
{
"epoch": 46.33317976596333,
"learning_rate": 1.6904761904761906e-05,
"loss": 1.4938382849229122,
"step": 31414
},
{
"epoch": 46.66635953192666,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.5032175789892146,
"step": 31640
},
{
"epoch": 46.99953929788998,
"learning_rate": 1.642857142857143e-05,
"loss": 1.4958131503214878,
"step": 31866
},
{
"epoch": 47.33317976596333,
"learning_rate": 1.6190476190476193e-05,
"loss": 1.5038191533721654,
"step": 32092
},
{
"epoch": 47.66635953192666,
"learning_rate": 1.595238095238095e-05,
"loss": 1.4897128755012445,
"step": 32318
},
{
"epoch": 47.99953929788998,
"learning_rate": 1.5714285714285715e-05,
"loss": 1.4913623100888413,
"step": 32544
},
{
"epoch": 48.33317976596333,
"learning_rate": 1.5476190476190476e-05,
"loss": 1.491287096411781,
"step": 32770
},
{
"epoch": 48.66635953192666,
"learning_rate": 1.5238095238095241e-05,
"loss": 1.4894442938070382,
"step": 32996
},
{
"epoch": 48.99953929788998,
"learning_rate": 1.5e-05,
"loss": 1.4941079468853706,
"step": 33222
},
{
"epoch": 49.33317976596333,
"learning_rate": 1.4761904761904763e-05,
"loss": 1.494901538950152,
"step": 33448
},
{
"epoch": 49.66635953192666,
"learning_rate": 1.4523809523809526e-05,
"loss": 1.4872477033496958,
"step": 33674
},
{
"epoch": 49.99953929788998,
"learning_rate": 1.4285714285714285e-05,
"loss": 1.4863664745229535,
"step": 33900
},
{
"epoch": 50.33317976596333,
"learning_rate": 1.4047619047619048e-05,
"loss": 1.4825258508192753,
"step": 34126
},
{
"epoch": 50.66635953192666,
"learning_rate": 1.3809523809523811e-05,
"loss": 1.4887811441337113,
"step": 34352
},
{
"epoch": 50.99953929788998,
"learning_rate": 1.357142857142857e-05,
"loss": 1.475349223719234,
"step": 34578
},
{
"epoch": 51.33317976596333,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.468778492075152,
"step": 34804
},
{
"epoch": 51.66635953192666,
"learning_rate": 1.3095238095238096e-05,
"loss": 1.4681135869659154,
"step": 35030
},
{
"epoch": 51.99953929788998,
"learning_rate": 1.2857142857142857e-05,
"loss": 1.4756152397763413,
"step": 35256
},
{
"epoch": 52.33317976596333,
"learning_rate": 1.261904761904762e-05,
"loss": 1.461721504684043,
"step": 35482
},
{
"epoch": 52.66635953192666,
"learning_rate": 1.2380952380952381e-05,
"loss": 1.472177421097207,
"step": 35708
},
{
"epoch": 52.99953929788998,
"learning_rate": 1.2142857142857144e-05,
"loss": 1.470105331555932,
"step": 35934
},
{
"epoch": 53.33317976596333,
"learning_rate": 1.1904761904761905e-05,
"loss": 1.463019582022608,
"step": 36160
},
{
"epoch": 53.66635953192666,
"learning_rate": 1.1666666666666668e-05,
"loss": 1.4712693205976908,
"step": 36386
},
{
"epoch": 53.99953929788998,
"learning_rate": 1.1428571428571429e-05,
"loss": 1.4639480725853844,
"step": 36612
},
{
"epoch": 54.33317976596333,
"learning_rate": 1.1190476190476192e-05,
"loss": 1.4621197185685149,
"step": 36838
},
{
"epoch": 54.66635953192666,
"learning_rate": 1.0952380952380953e-05,
"loss": 1.4438346930309736,
"step": 37064
},
{
"epoch": 54.99953929788998,
"learning_rate": 1.0714285714285714e-05,
"loss": 1.45255799420112,
"step": 37290
},
{
"epoch": 55.33317976596333,
"learning_rate": 1.0476190476190477e-05,
"loss": 1.4527645955043555,
"step": 37516
},
{
"epoch": 55.66635953192666,
"learning_rate": 1.0238095238095238e-05,
"loss": 1.4556512073077987,
"step": 37742
},
{
"epoch": 55.99953929788998,
"learning_rate": 1e-05,
"loss": 1.4523168243138136,
"step": 37968
},
{
"epoch": 56.33317976596333,
"learning_rate": 9.761904761904762e-06,
"loss": 1.4451588318411228,
"step": 38194
},
{
"epoch": 56.66635953192666,
"learning_rate": 9.523809523809523e-06,
"loss": 1.4351428546736726,
"step": 38420
},
{
"epoch": 56.99953929788998,
"learning_rate": 9.285714285714286e-06,
"loss": 1.4480798771951051,
"step": 38646
},
{
"epoch": 57.33317976596333,
"learning_rate": 9.047619047619047e-06,
"loss": 1.4419262641299087,
"step": 38872
},
{
"epoch": 57.66635953192666,
"learning_rate": 8.80952380952381e-06,
"loss": 1.4400379552250415,
"step": 39098
},
{
"epoch": 57.99953929788998,
"learning_rate": 8.571428571428573e-06,
"loss": 1.4458791006982854,
"step": 39324
},
{
"epoch": 58.33317976596333,
"learning_rate": 8.333333333333334e-06,
"loss": 1.4246890987970133,
"step": 39550
},
{
"epoch": 58.66635953192666,
"learning_rate": 8.095238095238097e-06,
"loss": 1.4372091377730918,
"step": 39776
},
{
"epoch": 58.99953929788998,
"learning_rate": 7.857142857142858e-06,
"loss": 1.4388618131654451,
"step": 40002
},
{
"epoch": 59.33317976596333,
"learning_rate": 7.6190476190476205e-06,
"loss": 1.437955738168902,
"step": 40228
},
{
"epoch": 59.66635953192666,
"learning_rate": 7.380952380952382e-06,
"loss": 1.4384045896276962,
"step": 40454
},
{
"epoch": 59.99953929788998,
"learning_rate": 7.142857142857143e-06,
"loss": 1.4317560786694552,
"step": 40680
},
{
"epoch": 60.33317976596333,
"learning_rate": 6.9047619047619055e-06,
"loss": 1.4312950741928236,
"step": 40906
},
{
"epoch": 60.66635953192666,
"learning_rate": 6.666666666666667e-06,
"loss": 1.4349378737728153,
"step": 41132
},
{
"epoch": 60.99953929788998,
"learning_rate": 6.428571428571429e-06,
"loss": 1.4232025146484375,
"step": 41358
},
{
"epoch": 61.33317976596333,
"learning_rate": 6.190476190476191e-06,
"loss": 1.4273036180344303,
"step": 41584
},
{
"epoch": 61.66635953192666,
"learning_rate": 5.9523809523809525e-06,
"loss": 1.437505671408324,
"step": 41810
},
{
"epoch": 61.99953929788998,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.4295697507605087,
"step": 42036
},
{
"epoch": 62.33317976596333,
"learning_rate": 5.4761904761904765e-06,
"loss": 1.4301999522521434,
"step": 42262
},
{
"epoch": 62.66635953192666,
"learning_rate": 5.2380952380952384e-06,
"loss": 1.4240087998651825,
"step": 42488
},
{
"epoch": 62.99953929788998,
"learning_rate": 5e-06,
"loss": 1.418294991012168,
"step": 42714
},
{
"epoch": 63.33317976596333,
"learning_rate": 4.7619047619047615e-06,
"loss": 1.4275296641662059,
"step": 42940
},
{
"epoch": 63.66635953192666,
"learning_rate": 4.5238095238095235e-06,
"loss": 1.4242185069396434,
"step": 43166
},
{
"epoch": 63.99953929788998,
"learning_rate": 4.285714285714286e-06,
"loss": 1.4242925053149198,
"step": 43392
},
{
"epoch": 64.33317976596332,
"learning_rate": 4.047619047619048e-06,
"loss": 1.4117900206979397,
"step": 43618
},
{
"epoch": 64.66635953192666,
"learning_rate": 3.8095238095238102e-06,
"loss": 1.4196827306156665,
"step": 43844
},
{
"epoch": 64.99953929788998,
"learning_rate": 3.5714285714285714e-06,
"loss": 1.4127752253439574,
"step": 44070
},
{
"epoch": 65.33317976596332,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.415105228930448,
"step": 44296
},
{
"epoch": 65.66635953192666,
"learning_rate": 3.0952380952380953e-06,
"loss": 1.4221684278640072,
"step": 44522
},
{
"epoch": 65.99953929788998,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.4126794865701051,
"step": 44748
},
{
"epoch": 66.33317976596332,
"learning_rate": 2.6190476190476192e-06,
"loss": 1.412820731644082,
"step": 44974
},
{
"epoch": 66.66635953192666,
"learning_rate": 2.3809523809523808e-06,
"loss": 1.4091276995903623,
"step": 45200
},
{
"epoch": 66.99953929788998,
"learning_rate": 2.142857142857143e-06,
"loss": 1.413559094994469,
"step": 45426
},
{
"epoch": 67.33317976596332,
"learning_rate": 1.9047619047619051e-06,
"loss": 1.4078961937828403,
"step": 45652
},
{
"epoch": 67.66635953192666,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.4104151092799364,
"step": 45878
},
{
"epoch": 67.99953929788998,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.4099604513792865,
"step": 46104
},
{
"epoch": 68.33317976596332,
"learning_rate": 1.1904761904761904e-06,
"loss": 1.406501297402171,
"step": 46330
},
{
"epoch": 68.66635953192666,
"learning_rate": 9.523809523809526e-07,
"loss": 1.4021470061445658,
"step": 46556
},
{
"epoch": 68.99953929788998,
"learning_rate": 7.142857142857143e-07,
"loss": 1.4063275092470962,
"step": 46782
}
],
"max_steps": 47460,
"num_train_epochs": 70,
"total_flos": 531450425497308624,
"trial_name": null,
"trial_params": null
}