|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 68.99953929788998, |
|
"global_step": 46782, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3331797659633281, |
|
"learning_rate": 4.976190476190477e-05, |
|
"loss": 2.685059944085315, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6663595319266562, |
|
"learning_rate": 4.9523809523809525e-05, |
|
"loss": 2.5215273156630253, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9995392978899843, |
|
"learning_rate": 4.928571428571429e-05, |
|
"loss": 2.449192013360758, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.3331797659633282, |
|
"learning_rate": 4.904761904761905e-05, |
|
"loss": 2.3792454542311945, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.6663595319266562, |
|
"learning_rate": 4.880952380952381e-05, |
|
"loss": 2.3559764760785398, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.9995392978899842, |
|
"learning_rate": 4.8571428571428576e-05, |
|
"loss": 2.328374001832135, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 2.333179765963328, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 2.2863990986241705, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 2.6663595319266564, |
|
"learning_rate": 4.80952380952381e-05, |
|
"loss": 2.2656879256256914, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 2.999539297889984, |
|
"learning_rate": 4.785714285714286e-05, |
|
"loss": 2.245354711481955, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 3.333179765963328, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 2.2154734113575083, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.6663595319266564, |
|
"learning_rate": 4.738095238095238e-05, |
|
"loss": 2.19904536694552, |
|
"step": 2486 |
|
}, |
|
{ |
|
"epoch": 3.999539297889984, |
|
"learning_rate": 4.714285714285714e-05, |
|
"loss": 2.185555112045423, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 4.333179765963328, |
|
"learning_rate": 4.690476190476191e-05, |
|
"loss": 2.162922816993916, |
|
"step": 2938 |
|
}, |
|
{ |
|
"epoch": 4.666359531926656, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.1419683135716263, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 4.999539297889984, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 2.1468805633814987, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 5.333179765963328, |
|
"learning_rate": 4.6190476190476194e-05, |
|
"loss": 2.114357872346861, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 5.666359531926656, |
|
"learning_rate": 4.595238095238095e-05, |
|
"loss": 2.1005791858234235, |
|
"step": 3842 |
|
}, |
|
{ |
|
"epoch": 5.999539297889984, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 2.0986059408272264, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 6.333179765963328, |
|
"learning_rate": 4.547619047619048e-05, |
|
"loss": 2.063803073579231, |
|
"step": 4294 |
|
}, |
|
{ |
|
"epoch": 6.666359531926656, |
|
"learning_rate": 4.523809523809524e-05, |
|
"loss": 2.0596065014864493, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 6.999539297889984, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.0634367276081997, |
|
"step": 4746 |
|
}, |
|
{ |
|
"epoch": 7.333179765963328, |
|
"learning_rate": 4.476190476190477e-05, |
|
"loss": 2.049277921693515, |
|
"step": 4972 |
|
}, |
|
{ |
|
"epoch": 7.666359531926656, |
|
"learning_rate": 4.4523809523809525e-05, |
|
"loss": 2.0317085738730642, |
|
"step": 5198 |
|
}, |
|
{ |
|
"epoch": 7.999539297889984, |
|
"learning_rate": 4.428571428571428e-05, |
|
"loss": 2.0282083697023645, |
|
"step": 5424 |
|
}, |
|
{ |
|
"epoch": 8.333179765963328, |
|
"learning_rate": 4.404761904761905e-05, |
|
"loss": 1.9894960116496128, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 8.666359531926656, |
|
"learning_rate": 4.380952380952381e-05, |
|
"loss": 1.996843321133504, |
|
"step": 5876 |
|
}, |
|
{ |
|
"epoch": 8.999539297889985, |
|
"learning_rate": 4.3571428571428576e-05, |
|
"loss": 2.0045918152395603, |
|
"step": 6102 |
|
}, |
|
{ |
|
"epoch": 9.333179765963328, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 1.966206238333103, |
|
"step": 6328 |
|
}, |
|
{ |
|
"epoch": 9.666359531926656, |
|
"learning_rate": 4.30952380952381e-05, |
|
"loss": 1.9711824940369191, |
|
"step": 6554 |
|
}, |
|
{ |
|
"epoch": 9.999539297889985, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.9805989223243916, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 10.333179765963328, |
|
"learning_rate": 4.261904761904762e-05, |
|
"loss": 1.9500477208500415, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 10.666359531926656, |
|
"learning_rate": 4.2380952380952385e-05, |
|
"loss": 1.9578322689090155, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 10.999539297889985, |
|
"learning_rate": 4.214285714285714e-05, |
|
"loss": 1.9484224572645878, |
|
"step": 7458 |
|
}, |
|
{ |
|
"epoch": 11.333179765963328, |
|
"learning_rate": 4.190476190476191e-05, |
|
"loss": 1.9283131287161228, |
|
"step": 7684 |
|
}, |
|
{ |
|
"epoch": 11.666359531926656, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.9156345603740321, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 11.999539297889985, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 1.926576732534223, |
|
"step": 8136 |
|
}, |
|
{ |
|
"epoch": 12.333179765963328, |
|
"learning_rate": 4.119047619047619e-05, |
|
"loss": 1.907513238687431, |
|
"step": 8362 |
|
}, |
|
{ |
|
"epoch": 12.666359531926656, |
|
"learning_rate": 4.095238095238095e-05, |
|
"loss": 1.9078028856125553, |
|
"step": 8588 |
|
}, |
|
{ |
|
"epoch": 12.999539297889985, |
|
"learning_rate": 4.0714285714285717e-05, |
|
"loss": 1.90263299182453, |
|
"step": 8814 |
|
}, |
|
{ |
|
"epoch": 13.333179765963328, |
|
"learning_rate": 4.047619047619048e-05, |
|
"loss": 1.8844813794161366, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 13.666359531926656, |
|
"learning_rate": 4.023809523809524e-05, |
|
"loss": 1.8840125429946764, |
|
"step": 9266 |
|
}, |
|
{ |
|
"epoch": 13.999539297889985, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8717386701465708, |
|
"step": 9492 |
|
}, |
|
{ |
|
"epoch": 14.333179765963328, |
|
"learning_rate": 3.976190476190476e-05, |
|
"loss": 1.858954910683421, |
|
"step": 9718 |
|
}, |
|
{ |
|
"epoch": 14.666359531926656, |
|
"learning_rate": 3.9523809523809526e-05, |
|
"loss": 1.8676287895810288, |
|
"step": 9944 |
|
}, |
|
{ |
|
"epoch": 14.999539297889985, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 1.8678895393304065, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 15.333179765963328, |
|
"learning_rate": 3.904761904761905e-05, |
|
"loss": 1.8486336227011892, |
|
"step": 10396 |
|
}, |
|
{ |
|
"epoch": 15.666359531926656, |
|
"learning_rate": 3.880952380952381e-05, |
|
"loss": 1.8404835388723728, |
|
"step": 10622 |
|
}, |
|
{ |
|
"epoch": 15.999539297889985, |
|
"learning_rate": 3.857142857142858e-05, |
|
"loss": 1.8481951688243226, |
|
"step": 10848 |
|
}, |
|
{ |
|
"epoch": 16.33317976596333, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 1.8267865476355087, |
|
"step": 11074 |
|
}, |
|
{ |
|
"epoch": 16.666359531926656, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 1.818214213953609, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 16.999539297889985, |
|
"learning_rate": 3.785714285714286e-05, |
|
"loss": 1.8284451645032493, |
|
"step": 11526 |
|
}, |
|
{ |
|
"epoch": 17.33317976596333, |
|
"learning_rate": 3.761904761904762e-05, |
|
"loss": 1.8039584539632882, |
|
"step": 11752 |
|
}, |
|
{ |
|
"epoch": 17.666359531926656, |
|
"learning_rate": 3.7380952380952386e-05, |
|
"loss": 1.8007052261217507, |
|
"step": 11978 |
|
}, |
|
{ |
|
"epoch": 17.999539297889985, |
|
"learning_rate": 3.7142857142857143e-05, |
|
"loss": 1.800786381274198, |
|
"step": 12204 |
|
}, |
|
{ |
|
"epoch": 18.33317976596333, |
|
"learning_rate": 3.690476190476191e-05, |
|
"loss": 1.7864516266679342, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 18.666359531926656, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.790079842626521, |
|
"step": 12656 |
|
}, |
|
{ |
|
"epoch": 18.999539297889985, |
|
"learning_rate": 3.642857142857143e-05, |
|
"loss": 1.7918755185287611, |
|
"step": 12882 |
|
}, |
|
{ |
|
"epoch": 19.33317976596333, |
|
"learning_rate": 3.619047619047619e-05, |
|
"loss": 1.7703251121318446, |
|
"step": 13108 |
|
}, |
|
{ |
|
"epoch": 19.666359531926656, |
|
"learning_rate": 3.595238095238095e-05, |
|
"loss": 1.7819123394721377, |
|
"step": 13334 |
|
}, |
|
{ |
|
"epoch": 19.999539297889985, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.7857482370022124, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 20.33317976596333, |
|
"learning_rate": 3.547619047619048e-05, |
|
"loss": 1.7561523977634126, |
|
"step": 13786 |
|
}, |
|
{ |
|
"epoch": 20.666359531926656, |
|
"learning_rate": 3.523809523809524e-05, |
|
"loss": 1.7562858108925608, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 20.999539297889985, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.767443496569068, |
|
"step": 14238 |
|
}, |
|
{ |
|
"epoch": 21.33317976596333, |
|
"learning_rate": 3.476190476190476e-05, |
|
"loss": 1.7421298406820382, |
|
"step": 14464 |
|
}, |
|
{ |
|
"epoch": 21.666359531926656, |
|
"learning_rate": 3.4523809523809526e-05, |
|
"loss": 1.7460298791395878, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 21.999539297889985, |
|
"learning_rate": 3.428571428571429e-05, |
|
"loss": 1.7635893695122373, |
|
"step": 14916 |
|
}, |
|
{ |
|
"epoch": 22.33317976596333, |
|
"learning_rate": 3.404761904761905e-05, |
|
"loss": 1.7318757993985066, |
|
"step": 15142 |
|
}, |
|
{ |
|
"epoch": 22.666359531926656, |
|
"learning_rate": 3.380952380952381e-05, |
|
"loss": 1.7320329784292035, |
|
"step": 15368 |
|
}, |
|
{ |
|
"epoch": 22.999539297889985, |
|
"learning_rate": 3.357142857142857e-05, |
|
"loss": 1.7314567903501799, |
|
"step": 15594 |
|
}, |
|
{ |
|
"epoch": 23.33317976596333, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.7091308863817063, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 23.666359531926656, |
|
"learning_rate": 3.309523809523809e-05, |
|
"loss": 1.7127776019341123, |
|
"step": 16046 |
|
}, |
|
{ |
|
"epoch": 23.999539297889985, |
|
"learning_rate": 3.285714285714286e-05, |
|
"loss": 1.714874267578125, |
|
"step": 16272 |
|
}, |
|
{ |
|
"epoch": 24.33317976596333, |
|
"learning_rate": 3.261904761904762e-05, |
|
"loss": 1.7092985980278623, |
|
"step": 16498 |
|
}, |
|
{ |
|
"epoch": 24.666359531926656, |
|
"learning_rate": 3.2380952380952386e-05, |
|
"loss": 1.7022116331927544, |
|
"step": 16724 |
|
}, |
|
{ |
|
"epoch": 24.999539297889985, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 1.7083171743207273, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 25.33317976596333, |
|
"learning_rate": 3.19047619047619e-05, |
|
"loss": 1.6961614558127074, |
|
"step": 17176 |
|
}, |
|
{ |
|
"epoch": 25.666359531926656, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 1.6968883413129148, |
|
"step": 17402 |
|
}, |
|
{ |
|
"epoch": 25.999539297889985, |
|
"learning_rate": 3.142857142857143e-05, |
|
"loss": 1.6995788844285813, |
|
"step": 17628 |
|
}, |
|
{ |
|
"epoch": 26.33317976596333, |
|
"learning_rate": 3.1190476190476195e-05, |
|
"loss": 1.6844244762859513, |
|
"step": 17854 |
|
}, |
|
{ |
|
"epoch": 26.666359531926656, |
|
"learning_rate": 3.095238095238095e-05, |
|
"loss": 1.6839947995886337, |
|
"step": 18080 |
|
}, |
|
{ |
|
"epoch": 26.999539297889985, |
|
"learning_rate": 3.071428571428572e-05, |
|
"loss": 1.6868854623980227, |
|
"step": 18306 |
|
}, |
|
{ |
|
"epoch": 27.33317976596333, |
|
"learning_rate": 3.0476190476190482e-05, |
|
"loss": 1.6642917869365321, |
|
"step": 18532 |
|
}, |
|
{ |
|
"epoch": 27.666359531926656, |
|
"learning_rate": 3.0238095238095236e-05, |
|
"loss": 1.6676389981160122, |
|
"step": 18758 |
|
}, |
|
{ |
|
"epoch": 27.999539297889985, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6597493939695105, |
|
"step": 18984 |
|
}, |
|
{ |
|
"epoch": 28.33317976596333, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 1.652435842868501, |
|
"step": 19210 |
|
}, |
|
{ |
|
"epoch": 28.666359531926656, |
|
"learning_rate": 2.9523809523809526e-05, |
|
"loss": 1.6600899485360205, |
|
"step": 19436 |
|
}, |
|
{ |
|
"epoch": 28.999539297889985, |
|
"learning_rate": 2.9285714285714288e-05, |
|
"loss": 1.6490822851130393, |
|
"step": 19662 |
|
}, |
|
{ |
|
"epoch": 29.33317976596333, |
|
"learning_rate": 2.9047619047619052e-05, |
|
"loss": 1.6511269628474143, |
|
"step": 19888 |
|
}, |
|
{ |
|
"epoch": 29.666359531926656, |
|
"learning_rate": 2.880952380952381e-05, |
|
"loss": 1.6514149893701604, |
|
"step": 20114 |
|
}, |
|
{ |
|
"epoch": 29.999539297889985, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.6452392308057937, |
|
"step": 20340 |
|
}, |
|
{ |
|
"epoch": 30.33317976596333, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 1.6428464366271434, |
|
"step": 20566 |
|
}, |
|
{ |
|
"epoch": 30.666359531926656, |
|
"learning_rate": 2.8095238095238096e-05, |
|
"loss": 1.632520692538371, |
|
"step": 20792 |
|
}, |
|
{ |
|
"epoch": 30.999539297889985, |
|
"learning_rate": 2.785714285714286e-05, |
|
"loss": 1.6398790747718472, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 31.33317976596333, |
|
"learning_rate": 2.7619047619047622e-05, |
|
"loss": 1.6239518697282909, |
|
"step": 21244 |
|
}, |
|
{ |
|
"epoch": 31.666359531926656, |
|
"learning_rate": 2.7380952380952383e-05, |
|
"loss": 1.6242760852374862, |
|
"step": 21470 |
|
}, |
|
{ |
|
"epoch": 31.999539297889985, |
|
"learning_rate": 2.714285714285714e-05, |
|
"loss": 1.627472463962251, |
|
"step": 21696 |
|
}, |
|
{ |
|
"epoch": 32.33317976596333, |
|
"learning_rate": 2.6904761904761905e-05, |
|
"loss": 1.6053812482715708, |
|
"step": 21922 |
|
}, |
|
{ |
|
"epoch": 32.66635953192666, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.610074878793902, |
|
"step": 22148 |
|
}, |
|
{ |
|
"epoch": 32.99953929788998, |
|
"learning_rate": 2.642857142857143e-05, |
|
"loss": 1.620673390616358, |
|
"step": 22374 |
|
}, |
|
{ |
|
"epoch": 33.33317976596333, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 1.6081807634471792, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 33.66635953192666, |
|
"learning_rate": 2.5952380952380957e-05, |
|
"loss": 1.6156560847189574, |
|
"step": 22826 |
|
}, |
|
{ |
|
"epoch": 33.99953929788998, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 1.6041984895689299, |
|
"step": 23052 |
|
}, |
|
{ |
|
"epoch": 34.33317976596333, |
|
"learning_rate": 2.5476190476190476e-05, |
|
"loss": 1.5947894881256914, |
|
"step": 23278 |
|
}, |
|
{ |
|
"epoch": 34.66635953192666, |
|
"learning_rate": 2.523809523809524e-05, |
|
"loss": 1.5988714167502074, |
|
"step": 23504 |
|
}, |
|
{ |
|
"epoch": 34.99953929788998, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.5926924173810841, |
|
"step": 23730 |
|
}, |
|
{ |
|
"epoch": 35.33317976596333, |
|
"learning_rate": 2.4761904761904762e-05, |
|
"loss": 1.5834472116115874, |
|
"step": 23956 |
|
}, |
|
{ |
|
"epoch": 35.66635953192666, |
|
"learning_rate": 2.4523809523809523e-05, |
|
"loss": 1.5848133458500415, |
|
"step": 24182 |
|
}, |
|
{ |
|
"epoch": 35.99953929788998, |
|
"learning_rate": 2.4285714285714288e-05, |
|
"loss": 1.602300593283324, |
|
"step": 24408 |
|
}, |
|
{ |
|
"epoch": 36.33317976596333, |
|
"learning_rate": 2.404761904761905e-05, |
|
"loss": 1.5835503772296737, |
|
"step": 24634 |
|
}, |
|
{ |
|
"epoch": 36.66635953192666, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 1.5831868669628042, |
|
"step": 24860 |
|
}, |
|
{ |
|
"epoch": 36.99953929788998, |
|
"learning_rate": 2.357142857142857e-05, |
|
"loss": 1.5691194787489628, |
|
"step": 25086 |
|
}, |
|
{ |
|
"epoch": 37.33317976596333, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 1.568113208872027, |
|
"step": 25312 |
|
}, |
|
{ |
|
"epoch": 37.66635953192666, |
|
"learning_rate": 2.3095238095238097e-05, |
|
"loss": 1.5696247742239353, |
|
"step": 25538 |
|
}, |
|
{ |
|
"epoch": 37.99953929788998, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 1.5706803313398783, |
|
"step": 25764 |
|
}, |
|
{ |
|
"epoch": 38.33317976596333, |
|
"learning_rate": 2.261904761904762e-05, |
|
"loss": 1.5539683114110896, |
|
"step": 25990 |
|
}, |
|
{ |
|
"epoch": 38.66635953192666, |
|
"learning_rate": 2.2380952380952384e-05, |
|
"loss": 1.5682053017405282, |
|
"step": 26216 |
|
}, |
|
{ |
|
"epoch": 38.99953929788998, |
|
"learning_rate": 2.214285714285714e-05, |
|
"loss": 1.5620073976769913, |
|
"step": 26442 |
|
}, |
|
{ |
|
"epoch": 39.33317976596333, |
|
"learning_rate": 2.1904761904761906e-05, |
|
"loss": 1.5591868172704646, |
|
"step": 26668 |
|
}, |
|
{ |
|
"epoch": 39.66635953192666, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 1.5566102424554065, |
|
"step": 26894 |
|
}, |
|
{ |
|
"epoch": 39.99953929788998, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 1.5528145849177268, |
|
"step": 27120 |
|
}, |
|
{ |
|
"epoch": 40.33317976596333, |
|
"learning_rate": 2.1190476190476193e-05, |
|
"loss": 1.5426600633469303, |
|
"step": 27346 |
|
}, |
|
{ |
|
"epoch": 40.66635953192666, |
|
"learning_rate": 2.0952380952380954e-05, |
|
"loss": 1.541890642284292, |
|
"step": 27572 |
|
}, |
|
{ |
|
"epoch": 40.99953929788998, |
|
"learning_rate": 2.0714285714285718e-05, |
|
"loss": 1.5517368823026134, |
|
"step": 27798 |
|
}, |
|
{ |
|
"epoch": 41.33317976596333, |
|
"learning_rate": 2.0476190476190476e-05, |
|
"loss": 1.540764057530766, |
|
"step": 28024 |
|
}, |
|
{ |
|
"epoch": 41.66635953192666, |
|
"learning_rate": 2.023809523809524e-05, |
|
"loss": 1.5214318469562362, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 41.99953929788998, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5345232128042035, |
|
"step": 28476 |
|
}, |
|
{ |
|
"epoch": 42.33317976596333, |
|
"learning_rate": 1.9761904761904763e-05, |
|
"loss": 1.5242220448181691, |
|
"step": 28702 |
|
}, |
|
{ |
|
"epoch": 42.66635953192666, |
|
"learning_rate": 1.9523809523809524e-05, |
|
"loss": 1.535507067114906, |
|
"step": 28928 |
|
}, |
|
{ |
|
"epoch": 42.99953929788998, |
|
"learning_rate": 1.928571428571429e-05, |
|
"loss": 1.5329083468006774, |
|
"step": 29154 |
|
}, |
|
{ |
|
"epoch": 43.33317976596333, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 1.5211832502246958, |
|
"step": 29380 |
|
}, |
|
{ |
|
"epoch": 43.66635953192666, |
|
"learning_rate": 1.880952380952381e-05, |
|
"loss": 1.5096026395274476, |
|
"step": 29606 |
|
}, |
|
{ |
|
"epoch": 43.99953929788998, |
|
"learning_rate": 1.8571428571428572e-05, |
|
"loss": 1.5239440107767561, |
|
"step": 29832 |
|
}, |
|
{ |
|
"epoch": 44.33317976596333, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 1.518264635474281, |
|
"step": 30058 |
|
}, |
|
{ |
|
"epoch": 44.66635953192666, |
|
"learning_rate": 1.8095238095238094e-05, |
|
"loss": 1.5015280394427544, |
|
"step": 30284 |
|
}, |
|
{ |
|
"epoch": 44.99953929788998, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.5198267033669801, |
|
"step": 30510 |
|
}, |
|
{ |
|
"epoch": 45.33317976596333, |
|
"learning_rate": 1.761904761904762e-05, |
|
"loss": 1.5029577744745575, |
|
"step": 30736 |
|
}, |
|
{ |
|
"epoch": 45.66635953192666, |
|
"learning_rate": 1.738095238095238e-05, |
|
"loss": 1.5046313800642976, |
|
"step": 30962 |
|
}, |
|
{ |
|
"epoch": 45.99953929788998, |
|
"learning_rate": 1.7142857142857145e-05, |
|
"loss": 1.5148508527637583, |
|
"step": 31188 |
|
}, |
|
{ |
|
"epoch": 46.33317976596333, |
|
"learning_rate": 1.6904761904761906e-05, |
|
"loss": 1.4938382849229122, |
|
"step": 31414 |
|
}, |
|
{ |
|
"epoch": 46.66635953192666, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.5032175789892146, |
|
"step": 31640 |
|
}, |
|
{ |
|
"epoch": 46.99953929788998, |
|
"learning_rate": 1.642857142857143e-05, |
|
"loss": 1.4958131503214878, |
|
"step": 31866 |
|
}, |
|
{ |
|
"epoch": 47.33317976596333, |
|
"learning_rate": 1.6190476190476193e-05, |
|
"loss": 1.5038191533721654, |
|
"step": 32092 |
|
}, |
|
{ |
|
"epoch": 47.66635953192666, |
|
"learning_rate": 1.595238095238095e-05, |
|
"loss": 1.4897128755012445, |
|
"step": 32318 |
|
}, |
|
{ |
|
"epoch": 47.99953929788998, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 1.4913623100888413, |
|
"step": 32544 |
|
}, |
|
{ |
|
"epoch": 48.33317976596333, |
|
"learning_rate": 1.5476190476190476e-05, |
|
"loss": 1.491287096411781, |
|
"step": 32770 |
|
}, |
|
{ |
|
"epoch": 48.66635953192666, |
|
"learning_rate": 1.5238095238095241e-05, |
|
"loss": 1.4894442938070382, |
|
"step": 32996 |
|
}, |
|
{ |
|
"epoch": 48.99953929788998, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.4941079468853706, |
|
"step": 33222 |
|
}, |
|
{ |
|
"epoch": 49.33317976596333, |
|
"learning_rate": 1.4761904761904763e-05, |
|
"loss": 1.494901538950152, |
|
"step": 33448 |
|
}, |
|
{ |
|
"epoch": 49.66635953192666, |
|
"learning_rate": 1.4523809523809526e-05, |
|
"loss": 1.4872477033496958, |
|
"step": 33674 |
|
}, |
|
{ |
|
"epoch": 49.99953929788998, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.4863664745229535, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 50.33317976596333, |
|
"learning_rate": 1.4047619047619048e-05, |
|
"loss": 1.4825258508192753, |
|
"step": 34126 |
|
}, |
|
{ |
|
"epoch": 50.66635953192666, |
|
"learning_rate": 1.3809523809523811e-05, |
|
"loss": 1.4887811441337113, |
|
"step": 34352 |
|
}, |
|
{ |
|
"epoch": 50.99953929788998, |
|
"learning_rate": 1.357142857142857e-05, |
|
"loss": 1.475349223719234, |
|
"step": 34578 |
|
}, |
|
{ |
|
"epoch": 51.33317976596333, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.468778492075152, |
|
"step": 34804 |
|
}, |
|
{ |
|
"epoch": 51.66635953192666, |
|
"learning_rate": 1.3095238095238096e-05, |
|
"loss": 1.4681135869659154, |
|
"step": 35030 |
|
}, |
|
{ |
|
"epoch": 51.99953929788998, |
|
"learning_rate": 1.2857142857142857e-05, |
|
"loss": 1.4756152397763413, |
|
"step": 35256 |
|
}, |
|
{ |
|
"epoch": 52.33317976596333, |
|
"learning_rate": 1.261904761904762e-05, |
|
"loss": 1.461721504684043, |
|
"step": 35482 |
|
}, |
|
{ |
|
"epoch": 52.66635953192666, |
|
"learning_rate": 1.2380952380952381e-05, |
|
"loss": 1.472177421097207, |
|
"step": 35708 |
|
}, |
|
{ |
|
"epoch": 52.99953929788998, |
|
"learning_rate": 1.2142857142857144e-05, |
|
"loss": 1.470105331555932, |
|
"step": 35934 |
|
}, |
|
{ |
|
"epoch": 53.33317976596333, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 1.463019582022608, |
|
"step": 36160 |
|
}, |
|
{ |
|
"epoch": 53.66635953192666, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 1.4712693205976908, |
|
"step": 36386 |
|
}, |
|
{ |
|
"epoch": 53.99953929788998, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 1.4639480725853844, |
|
"step": 36612 |
|
}, |
|
{ |
|
"epoch": 54.33317976596333, |
|
"learning_rate": 1.1190476190476192e-05, |
|
"loss": 1.4621197185685149, |
|
"step": 36838 |
|
}, |
|
{ |
|
"epoch": 54.66635953192666, |
|
"learning_rate": 1.0952380952380953e-05, |
|
"loss": 1.4438346930309736, |
|
"step": 37064 |
|
}, |
|
{ |
|
"epoch": 54.99953929788998, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 1.45255799420112, |
|
"step": 37290 |
|
}, |
|
{ |
|
"epoch": 55.33317976596333, |
|
"learning_rate": 1.0476190476190477e-05, |
|
"loss": 1.4527645955043555, |
|
"step": 37516 |
|
}, |
|
{ |
|
"epoch": 55.66635953192666, |
|
"learning_rate": 1.0238095238095238e-05, |
|
"loss": 1.4556512073077987, |
|
"step": 37742 |
|
}, |
|
{ |
|
"epoch": 55.99953929788998, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4523168243138136, |
|
"step": 37968 |
|
}, |
|
{ |
|
"epoch": 56.33317976596333, |
|
"learning_rate": 9.761904761904762e-06, |
|
"loss": 1.4451588318411228, |
|
"step": 38194 |
|
}, |
|
{ |
|
"epoch": 56.66635953192666, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 1.4351428546736726, |
|
"step": 38420 |
|
}, |
|
{ |
|
"epoch": 56.99953929788998, |
|
"learning_rate": 9.285714285714286e-06, |
|
"loss": 1.4480798771951051, |
|
"step": 38646 |
|
}, |
|
{ |
|
"epoch": 57.33317976596333, |
|
"learning_rate": 9.047619047619047e-06, |
|
"loss": 1.4419262641299087, |
|
"step": 38872 |
|
}, |
|
{ |
|
"epoch": 57.66635953192666, |
|
"learning_rate": 8.80952380952381e-06, |
|
"loss": 1.4400379552250415, |
|
"step": 39098 |
|
}, |
|
{ |
|
"epoch": 57.99953929788998, |
|
"learning_rate": 8.571428571428573e-06, |
|
"loss": 1.4458791006982854, |
|
"step": 39324 |
|
}, |
|
{ |
|
"epoch": 58.33317976596333, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.4246890987970133, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 58.66635953192666, |
|
"learning_rate": 8.095238095238097e-06, |
|
"loss": 1.4372091377730918, |
|
"step": 39776 |
|
}, |
|
{ |
|
"epoch": 58.99953929788998, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 1.4388618131654451, |
|
"step": 40002 |
|
}, |
|
{ |
|
"epoch": 59.33317976596333, |
|
"learning_rate": 7.6190476190476205e-06, |
|
"loss": 1.437955738168902, |
|
"step": 40228 |
|
}, |
|
{ |
|
"epoch": 59.66635953192666, |
|
"learning_rate": 7.380952380952382e-06, |
|
"loss": 1.4384045896276962, |
|
"step": 40454 |
|
}, |
|
{ |
|
"epoch": 59.99953929788998, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 1.4317560786694552, |
|
"step": 40680 |
|
}, |
|
{ |
|
"epoch": 60.33317976596333, |
|
"learning_rate": 6.9047619047619055e-06, |
|
"loss": 1.4312950741928236, |
|
"step": 40906 |
|
}, |
|
{ |
|
"epoch": 60.66635953192666, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.4349378737728153, |
|
"step": 41132 |
|
}, |
|
{ |
|
"epoch": 60.99953929788998, |
|
"learning_rate": 6.428571428571429e-06, |
|
"loss": 1.4232025146484375, |
|
"step": 41358 |
|
}, |
|
{ |
|
"epoch": 61.33317976596333, |
|
"learning_rate": 6.190476190476191e-06, |
|
"loss": 1.4273036180344303, |
|
"step": 41584 |
|
}, |
|
{ |
|
"epoch": 61.66635953192666, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 1.437505671408324, |
|
"step": 41810 |
|
}, |
|
{ |
|
"epoch": 61.99953929788998, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.4295697507605087, |
|
"step": 42036 |
|
}, |
|
{ |
|
"epoch": 62.33317976596333, |
|
"learning_rate": 5.4761904761904765e-06, |
|
"loss": 1.4301999522521434, |
|
"step": 42262 |
|
}, |
|
{ |
|
"epoch": 62.66635953192666, |
|
"learning_rate": 5.2380952380952384e-06, |
|
"loss": 1.4240087998651825, |
|
"step": 42488 |
|
}, |
|
{ |
|
"epoch": 62.99953929788998, |
|
"learning_rate": 5e-06, |
|
"loss": 1.418294991012168, |
|
"step": 42714 |
|
}, |
|
{ |
|
"epoch": 63.33317976596333, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 1.4275296641662059, |
|
"step": 42940 |
|
}, |
|
{ |
|
"epoch": 63.66635953192666, |
|
"learning_rate": 4.5238095238095235e-06, |
|
"loss": 1.4242185069396434, |
|
"step": 43166 |
|
}, |
|
{ |
|
"epoch": 63.99953929788998, |
|
"learning_rate": 4.285714285714286e-06, |
|
"loss": 1.4242925053149198, |
|
"step": 43392 |
|
}, |
|
{ |
|
"epoch": 64.33317976596332, |
|
"learning_rate": 4.047619047619048e-06, |
|
"loss": 1.4117900206979397, |
|
"step": 43618 |
|
}, |
|
{ |
|
"epoch": 64.66635953192666, |
|
"learning_rate": 3.8095238095238102e-06, |
|
"loss": 1.4196827306156665, |
|
"step": 43844 |
|
}, |
|
{ |
|
"epoch": 64.99953929788998, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 1.4127752253439574, |
|
"step": 44070 |
|
}, |
|
{ |
|
"epoch": 65.33317976596332, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.415105228930448, |
|
"step": 44296 |
|
}, |
|
{ |
|
"epoch": 65.66635953192666, |
|
"learning_rate": 3.0952380952380953e-06, |
|
"loss": 1.4221684278640072, |
|
"step": 44522 |
|
}, |
|
{ |
|
"epoch": 65.99953929788998, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 1.4126794865701051, |
|
"step": 44748 |
|
}, |
|
{ |
|
"epoch": 66.33317976596332, |
|
"learning_rate": 2.6190476190476192e-06, |
|
"loss": 1.412820731644082, |
|
"step": 44974 |
|
}, |
|
{ |
|
"epoch": 66.66635953192666, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 1.4091276995903623, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 66.99953929788998, |
|
"learning_rate": 2.142857142857143e-06, |
|
"loss": 1.413559094994469, |
|
"step": 45426 |
|
}, |
|
{ |
|
"epoch": 67.33317976596332, |
|
"learning_rate": 1.9047619047619051e-06, |
|
"loss": 1.4078961937828403, |
|
"step": 45652 |
|
}, |
|
{ |
|
"epoch": 67.66635953192666, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.4104151092799364, |
|
"step": 45878 |
|
}, |
|
{ |
|
"epoch": 67.99953929788998, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 1.4099604513792865, |
|
"step": 46104 |
|
}, |
|
{ |
|
"epoch": 68.33317976596332, |
|
"learning_rate": 1.1904761904761904e-06, |
|
"loss": 1.406501297402171, |
|
"step": 46330 |
|
}, |
|
{ |
|
"epoch": 68.66635953192666, |
|
"learning_rate": 9.523809523809526e-07, |
|
"loss": 1.4021470061445658, |
|
"step": 46556 |
|
}, |
|
{ |
|
"epoch": 68.99953929788998, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 1.4063275092470962, |
|
"step": 46782 |
|
} |
|
], |
|
"max_steps": 47460, |
|
"num_train_epochs": 70, |
|
"total_flos": 531450425497308624, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|