{ "best_metric": null, "best_model_checkpoint": null, "epoch": 68.99953929788998, "global_step": 46782, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3331797659633281, "learning_rate": 4.976190476190477e-05, "loss": 2.685059944085315, "step": 226 }, { "epoch": 0.6663595319266562, "learning_rate": 4.9523809523809525e-05, "loss": 2.5215273156630253, "step": 452 }, { "epoch": 0.9995392978899843, "learning_rate": 4.928571428571429e-05, "loss": 2.449192013360758, "step": 678 }, { "epoch": 1.3331797659633282, "learning_rate": 4.904761904761905e-05, "loss": 2.3792454542311945, "step": 904 }, { "epoch": 1.6663595319266562, "learning_rate": 4.880952380952381e-05, "loss": 2.3559764760785398, "step": 1130 }, { "epoch": 1.9995392978899842, "learning_rate": 4.8571428571428576e-05, "loss": 2.328374001832135, "step": 1356 }, { "epoch": 2.333179765963328, "learning_rate": 4.8333333333333334e-05, "loss": 2.2863990986241705, "step": 1582 }, { "epoch": 2.6663595319266564, "learning_rate": 4.80952380952381e-05, "loss": 2.2656879256256914, "step": 1808 }, { "epoch": 2.999539297889984, "learning_rate": 4.785714285714286e-05, "loss": 2.245354711481955, "step": 2034 }, { "epoch": 3.333179765963328, "learning_rate": 4.761904761904762e-05, "loss": 2.2154734113575083, "step": 2260 }, { "epoch": 3.6663595319266564, "learning_rate": 4.738095238095238e-05, "loss": 2.19904536694552, "step": 2486 }, { "epoch": 3.999539297889984, "learning_rate": 4.714285714285714e-05, "loss": 2.185555112045423, "step": 2712 }, { "epoch": 4.333179765963328, "learning_rate": 4.690476190476191e-05, "loss": 2.162922816993916, "step": 2938 }, { "epoch": 4.666359531926656, "learning_rate": 4.666666666666667e-05, "loss": 2.1419683135716263, "step": 3164 }, { "epoch": 4.999539297889984, "learning_rate": 4.642857142857143e-05, "loss": 2.1468805633814987, "step": 3390 }, { "epoch": 5.333179765963328, "learning_rate": 4.6190476190476194e-05, "loss": 2.114357872346861, "step": 3616 }, { "epoch": 5.666359531926656, "learning_rate": 4.595238095238095e-05, "loss": 2.1005791858234235, "step": 3842 }, { "epoch": 5.999539297889984, "learning_rate": 4.5714285714285716e-05, "loss": 2.0986059408272264, "step": 4068 }, { "epoch": 6.333179765963328, "learning_rate": 4.547619047619048e-05, "loss": 2.063803073579231, "step": 4294 }, { "epoch": 6.666359531926656, "learning_rate": 4.523809523809524e-05, "loss": 2.0596065014864493, "step": 4520 }, { "epoch": 6.999539297889984, "learning_rate": 4.5e-05, "loss": 2.0634367276081997, "step": 4746 }, { "epoch": 7.333179765963328, "learning_rate": 4.476190476190477e-05, "loss": 2.049277921693515, "step": 4972 }, { "epoch": 7.666359531926656, "learning_rate": 4.4523809523809525e-05, "loss": 2.0317085738730642, "step": 5198 }, { "epoch": 7.999539297889984, "learning_rate": 4.428571428571428e-05, "loss": 2.0282083697023645, "step": 5424 }, { "epoch": 8.333179765963328, "learning_rate": 4.404761904761905e-05, "loss": 1.9894960116496128, "step": 5650 }, { "epoch": 8.666359531926656, "learning_rate": 4.380952380952381e-05, "loss": 1.996843321133504, "step": 5876 }, { "epoch": 8.999539297889985, "learning_rate": 4.3571428571428576e-05, "loss": 2.0045918152395603, "step": 6102 }, { "epoch": 9.333179765963328, "learning_rate": 4.3333333333333334e-05, "loss": 1.966206238333103, "step": 6328 }, { "epoch": 9.666359531926656, "learning_rate": 4.30952380952381e-05, "loss": 1.9711824940369191, "step": 6554 }, { "epoch": 9.999539297889985, "learning_rate": 4.2857142857142856e-05, "loss": 1.9805989223243916, "step": 6780 }, { "epoch": 10.333179765963328, "learning_rate": 4.261904761904762e-05, "loss": 1.9500477208500415, "step": 7006 }, { "epoch": 10.666359531926656, "learning_rate": 4.2380952380952385e-05, "loss": 1.9578322689090155, "step": 7232 }, { "epoch": 10.999539297889985, "learning_rate": 4.214285714285714e-05, "loss": 1.9484224572645878, "step": 7458 }, { "epoch": 11.333179765963328, "learning_rate": 4.190476190476191e-05, "loss": 1.9283131287161228, "step": 7684 }, { "epoch": 11.666359531926656, "learning_rate": 4.166666666666667e-05, "loss": 1.9156345603740321, "step": 7910 }, { "epoch": 11.999539297889985, "learning_rate": 4.1428571428571437e-05, "loss": 1.926576732534223, "step": 8136 }, { "epoch": 12.333179765963328, "learning_rate": 4.119047619047619e-05, "loss": 1.907513238687431, "step": 8362 }, { "epoch": 12.666359531926656, "learning_rate": 4.095238095238095e-05, "loss": 1.9078028856125553, "step": 8588 }, { "epoch": 12.999539297889985, "learning_rate": 4.0714285714285717e-05, "loss": 1.90263299182453, "step": 8814 }, { "epoch": 13.333179765963328, "learning_rate": 4.047619047619048e-05, "loss": 1.8844813794161366, "step": 9040 }, { "epoch": 13.666359531926656, "learning_rate": 4.023809523809524e-05, "loss": 1.8840125429946764, "step": 9266 }, { "epoch": 13.999539297889985, "learning_rate": 4e-05, "loss": 1.8717386701465708, "step": 9492 }, { "epoch": 14.333179765963328, "learning_rate": 3.976190476190476e-05, "loss": 1.858954910683421, "step": 9718 }, { "epoch": 14.666359531926656, "learning_rate": 3.9523809523809526e-05, "loss": 1.8676287895810288, "step": 9944 }, { "epoch": 14.999539297889985, "learning_rate": 3.928571428571429e-05, "loss": 1.8678895393304065, "step": 10170 }, { "epoch": 15.333179765963328, "learning_rate": 3.904761904761905e-05, "loss": 1.8486336227011892, "step": 10396 }, { "epoch": 15.666359531926656, "learning_rate": 3.880952380952381e-05, "loss": 1.8404835388723728, "step": 10622 }, { "epoch": 15.999539297889985, "learning_rate": 3.857142857142858e-05, "loss": 1.8481951688243226, "step": 10848 }, { "epoch": 16.33317976596333, "learning_rate": 3.8333333333333334e-05, "loss": 1.8267865476355087, "step": 11074 }, { "epoch": 16.666359531926656, "learning_rate": 3.809523809523809e-05, "loss": 1.818214213953609, "step": 11300 }, { "epoch": 16.999539297889985, "learning_rate": 3.785714285714286e-05, "loss": 1.8284451645032493, "step": 11526 }, { "epoch": 17.33317976596333, "learning_rate": 3.761904761904762e-05, "loss": 1.8039584539632882, "step": 11752 }, { "epoch": 17.666359531926656, "learning_rate": 3.7380952380952386e-05, "loss": 1.8007052261217507, "step": 11978 }, { "epoch": 17.999539297889985, "learning_rate": 3.7142857142857143e-05, "loss": 1.800786381274198, "step": 12204 }, { "epoch": 18.33317976596333, "learning_rate": 3.690476190476191e-05, "loss": 1.7864516266679342, "step": 12430 }, { "epoch": 18.666359531926656, "learning_rate": 3.6666666666666666e-05, "loss": 1.790079842626521, "step": 12656 }, { "epoch": 18.999539297889985, "learning_rate": 3.642857142857143e-05, "loss": 1.7918755185287611, "step": 12882 }, { "epoch": 19.33317976596333, "learning_rate": 3.619047619047619e-05, "loss": 1.7703251121318446, "step": 13108 }, { "epoch": 19.666359531926656, "learning_rate": 3.595238095238095e-05, "loss": 1.7819123394721377, "step": 13334 }, { "epoch": 19.999539297889985, "learning_rate": 3.571428571428572e-05, "loss": 1.7857482370022124, "step": 13560 }, { "epoch": 20.33317976596333, "learning_rate": 3.547619047619048e-05, "loss": 1.7561523977634126, "step": 13786 }, { "epoch": 20.666359531926656, "learning_rate": 3.523809523809524e-05, "loss": 1.7562858108925608, "step": 14012 }, { "epoch": 20.999539297889985, "learning_rate": 3.5e-05, "loss": 1.767443496569068, "step": 14238 }, { "epoch": 21.33317976596333, "learning_rate": 3.476190476190476e-05, "loss": 1.7421298406820382, "step": 14464 }, { "epoch": 21.666359531926656, "learning_rate": 3.4523809523809526e-05, "loss": 1.7460298791395878, "step": 14690 }, { "epoch": 21.999539297889985, "learning_rate": 3.428571428571429e-05, "loss": 1.7635893695122373, "step": 14916 }, { "epoch": 22.33317976596333, "learning_rate": 3.404761904761905e-05, "loss": 1.7318757993985066, "step": 15142 }, { "epoch": 22.666359531926656, "learning_rate": 3.380952380952381e-05, "loss": 1.7320329784292035, "step": 15368 }, { "epoch": 22.999539297889985, "learning_rate": 3.357142857142857e-05, "loss": 1.7314567903501799, "step": 15594 }, { "epoch": 23.33317976596333, "learning_rate": 3.3333333333333335e-05, "loss": 1.7091308863817063, "step": 15820 }, { "epoch": 23.666359531926656, "learning_rate": 3.309523809523809e-05, "loss": 1.7127776019341123, "step": 16046 }, { "epoch": 23.999539297889985, "learning_rate": 3.285714285714286e-05, "loss": 1.714874267578125, "step": 16272 }, { "epoch": 24.33317976596333, "learning_rate": 3.261904761904762e-05, "loss": 1.7092985980278623, "step": 16498 }, { "epoch": 24.666359531926656, "learning_rate": 3.2380952380952386e-05, "loss": 1.7022116331927544, "step": 16724 }, { "epoch": 24.999539297889985, "learning_rate": 3.2142857142857144e-05, "loss": 1.7083171743207273, "step": 16950 }, { "epoch": 25.33317976596333, "learning_rate": 3.19047619047619e-05, "loss": 1.6961614558127074, "step": 17176 }, { "epoch": 25.666359531926656, "learning_rate": 3.1666666666666666e-05, "loss": 1.6968883413129148, "step": 17402 }, { "epoch": 25.999539297889985, "learning_rate": 3.142857142857143e-05, "loss": 1.6995788844285813, "step": 17628 }, { "epoch": 26.33317976596333, "learning_rate": 3.1190476190476195e-05, "loss": 1.6844244762859513, "step": 17854 }, { "epoch": 26.666359531926656, "learning_rate": 3.095238095238095e-05, "loss": 1.6839947995886337, "step": 18080 }, { "epoch": 26.999539297889985, "learning_rate": 3.071428571428572e-05, "loss": 1.6868854623980227, "step": 18306 }, { "epoch": 27.33317976596333, "learning_rate": 3.0476190476190482e-05, "loss": 1.6642917869365321, "step": 18532 }, { "epoch": 27.666359531926656, "learning_rate": 3.0238095238095236e-05, "loss": 1.6676389981160122, "step": 18758 }, { "epoch": 27.999539297889985, "learning_rate": 3e-05, "loss": 1.6597493939695105, "step": 18984 }, { "epoch": 28.33317976596333, "learning_rate": 2.9761904761904762e-05, "loss": 1.652435842868501, "step": 19210 }, { "epoch": 28.666359531926656, "learning_rate": 2.9523809523809526e-05, "loss": 1.6600899485360205, "step": 19436 }, { "epoch": 28.999539297889985, "learning_rate": 2.9285714285714288e-05, "loss": 1.6490822851130393, "step": 19662 }, { "epoch": 29.33317976596333, "learning_rate": 2.9047619047619052e-05, "loss": 1.6511269628474143, "step": 19888 }, { "epoch": 29.666359531926656, "learning_rate": 2.880952380952381e-05, "loss": 1.6514149893701604, "step": 20114 }, { "epoch": 29.999539297889985, "learning_rate": 2.857142857142857e-05, "loss": 1.6452392308057937, "step": 20340 }, { "epoch": 30.33317976596333, "learning_rate": 2.8333333333333335e-05, "loss": 1.6428464366271434, "step": 20566 }, { "epoch": 30.666359531926656, "learning_rate": 2.8095238095238096e-05, "loss": 1.632520692538371, "step": 20792 }, { "epoch": 30.999539297889985, "learning_rate": 2.785714285714286e-05, "loss": 1.6398790747718472, "step": 21018 }, { "epoch": 31.33317976596333, "learning_rate": 2.7619047619047622e-05, "loss": 1.6239518697282909, "step": 21244 }, { "epoch": 31.666359531926656, "learning_rate": 2.7380952380952383e-05, "loss": 1.6242760852374862, "step": 21470 }, { "epoch": 31.999539297889985, "learning_rate": 2.714285714285714e-05, "loss": 1.627472463962251, "step": 21696 }, { "epoch": 32.33317976596333, "learning_rate": 2.6904761904761905e-05, "loss": 1.6053812482715708, "step": 21922 }, { "epoch": 32.66635953192666, "learning_rate": 2.6666666666666667e-05, "loss": 1.610074878793902, "step": 22148 }, { "epoch": 32.99953929788998, "learning_rate": 2.642857142857143e-05, "loss": 1.620673390616358, "step": 22374 }, { "epoch": 33.33317976596333, "learning_rate": 2.6190476190476192e-05, "loss": 1.6081807634471792, "step": 22600 }, { "epoch": 33.66635953192666, "learning_rate": 2.5952380952380957e-05, "loss": 1.6156560847189574, "step": 22826 }, { "epoch": 33.99953929788998, "learning_rate": 2.5714285714285714e-05, "loss": 1.6041984895689299, "step": 23052 }, { "epoch": 34.33317976596333, "learning_rate": 2.5476190476190476e-05, "loss": 1.5947894881256914, "step": 23278 }, { "epoch": 34.66635953192666, "learning_rate": 2.523809523809524e-05, "loss": 1.5988714167502074, "step": 23504 }, { "epoch": 34.99953929788998, "learning_rate": 2.5e-05, "loss": 1.5926924173810841, "step": 23730 }, { "epoch": 35.33317976596333, "learning_rate": 2.4761904761904762e-05, "loss": 1.5834472116115874, "step": 23956 }, { "epoch": 35.66635953192666, "learning_rate": 2.4523809523809523e-05, "loss": 1.5848133458500415, "step": 24182 }, { "epoch": 35.99953929788998, "learning_rate": 2.4285714285714288e-05, "loss": 1.602300593283324, "step": 24408 }, { "epoch": 36.33317976596333, "learning_rate": 2.404761904761905e-05, "loss": 1.5835503772296737, "step": 24634 }, { "epoch": 36.66635953192666, "learning_rate": 2.380952380952381e-05, "loss": 1.5831868669628042, "step": 24860 }, { "epoch": 36.99953929788998, "learning_rate": 2.357142857142857e-05, "loss": 1.5691194787489628, "step": 25086 }, { "epoch": 37.33317976596333, "learning_rate": 2.3333333333333336e-05, "loss": 1.568113208872027, "step": 25312 }, { "epoch": 37.66635953192666, "learning_rate": 2.3095238095238097e-05, "loss": 1.5696247742239353, "step": 25538 }, { "epoch": 37.99953929788998, "learning_rate": 2.2857142857142858e-05, "loss": 1.5706803313398783, "step": 25764 }, { "epoch": 38.33317976596333, "learning_rate": 2.261904761904762e-05, "loss": 1.5539683114110896, "step": 25990 }, { "epoch": 38.66635953192666, "learning_rate": 2.2380952380952384e-05, "loss": 1.5682053017405282, "step": 26216 }, { "epoch": 38.99953929788998, "learning_rate": 2.214285714285714e-05, "loss": 1.5620073976769913, "step": 26442 }, { "epoch": 39.33317976596333, "learning_rate": 2.1904761904761906e-05, "loss": 1.5591868172704646, "step": 26668 }, { "epoch": 39.66635953192666, "learning_rate": 2.1666666666666667e-05, "loss": 1.5566102424554065, "step": 26894 }, { "epoch": 39.99953929788998, "learning_rate": 2.1428571428571428e-05, "loss": 1.5528145849177268, "step": 27120 }, { "epoch": 40.33317976596333, "learning_rate": 2.1190476190476193e-05, "loss": 1.5426600633469303, "step": 27346 }, { "epoch": 40.66635953192666, "learning_rate": 2.0952380952380954e-05, "loss": 1.541890642284292, "step": 27572 }, { "epoch": 40.99953929788998, "learning_rate": 2.0714285714285718e-05, "loss": 1.5517368823026134, "step": 27798 }, { "epoch": 41.33317976596333, "learning_rate": 2.0476190476190476e-05, "loss": 1.540764057530766, "step": 28024 }, { "epoch": 41.66635953192666, "learning_rate": 2.023809523809524e-05, "loss": 1.5214318469562362, "step": 28250 }, { "epoch": 41.99953929788998, "learning_rate": 2e-05, "loss": 1.5345232128042035, "step": 28476 }, { "epoch": 42.33317976596333, "learning_rate": 1.9761904761904763e-05, "loss": 1.5242220448181691, "step": 28702 }, { "epoch": 42.66635953192666, "learning_rate": 1.9523809523809524e-05, "loss": 1.535507067114906, "step": 28928 }, { "epoch": 42.99953929788998, "learning_rate": 1.928571428571429e-05, "loss": 1.5329083468006774, "step": 29154 }, { "epoch": 43.33317976596333, "learning_rate": 1.9047619047619046e-05, "loss": 1.5211832502246958, "step": 29380 }, { "epoch": 43.66635953192666, "learning_rate": 1.880952380952381e-05, "loss": 1.5096026395274476, "step": 29606 }, { "epoch": 43.99953929788998, "learning_rate": 1.8571428571428572e-05, "loss": 1.5239440107767561, "step": 29832 }, { "epoch": 44.33317976596333, "learning_rate": 1.8333333333333333e-05, "loss": 1.518264635474281, "step": 30058 }, { "epoch": 44.66635953192666, "learning_rate": 1.8095238095238094e-05, "loss": 1.5015280394427544, "step": 30284 }, { "epoch": 44.99953929788998, "learning_rate": 1.785714285714286e-05, "loss": 1.5198267033669801, "step": 30510 }, { "epoch": 45.33317976596333, "learning_rate": 1.761904761904762e-05, "loss": 1.5029577744745575, "step": 30736 }, { "epoch": 45.66635953192666, "learning_rate": 1.738095238095238e-05, "loss": 1.5046313800642976, "step": 30962 }, { "epoch": 45.99953929788998, "learning_rate": 1.7142857142857145e-05, "loss": 1.5148508527637583, "step": 31188 }, { "epoch": 46.33317976596333, "learning_rate": 1.6904761904761906e-05, "loss": 1.4938382849229122, "step": 31414 }, { "epoch": 46.66635953192666, "learning_rate": 1.6666666666666667e-05, "loss": 1.5032175789892146, "step": 31640 }, { "epoch": 46.99953929788998, "learning_rate": 1.642857142857143e-05, "loss": 1.4958131503214878, "step": 31866 }, { "epoch": 47.33317976596333, "learning_rate": 1.6190476190476193e-05, "loss": 1.5038191533721654, "step": 32092 }, { "epoch": 47.66635953192666, "learning_rate": 1.595238095238095e-05, "loss": 1.4897128755012445, "step": 32318 }, { "epoch": 47.99953929788998, "learning_rate": 1.5714285714285715e-05, "loss": 1.4913623100888413, "step": 32544 }, { "epoch": 48.33317976596333, "learning_rate": 1.5476190476190476e-05, "loss": 1.491287096411781, "step": 32770 }, { "epoch": 48.66635953192666, "learning_rate": 1.5238095238095241e-05, "loss": 1.4894442938070382, "step": 32996 }, { "epoch": 48.99953929788998, "learning_rate": 1.5e-05, "loss": 1.4941079468853706, "step": 33222 }, { "epoch": 49.33317976596333, "learning_rate": 1.4761904761904763e-05, "loss": 1.494901538950152, "step": 33448 }, { "epoch": 49.66635953192666, "learning_rate": 1.4523809523809526e-05, "loss": 1.4872477033496958, "step": 33674 }, { "epoch": 49.99953929788998, "learning_rate": 1.4285714285714285e-05, "loss": 1.4863664745229535, "step": 33900 }, { "epoch": 50.33317976596333, "learning_rate": 1.4047619047619048e-05, "loss": 1.4825258508192753, "step": 34126 }, { "epoch": 50.66635953192666, "learning_rate": 1.3809523809523811e-05, "loss": 1.4887811441337113, "step": 34352 }, { "epoch": 50.99953929788998, "learning_rate": 1.357142857142857e-05, "loss": 1.475349223719234, "step": 34578 }, { "epoch": 51.33317976596333, "learning_rate": 1.3333333333333333e-05, "loss": 1.468778492075152, "step": 34804 }, { "epoch": 51.66635953192666, "learning_rate": 1.3095238095238096e-05, "loss": 1.4681135869659154, "step": 35030 }, { "epoch": 51.99953929788998, "learning_rate": 1.2857142857142857e-05, "loss": 1.4756152397763413, "step": 35256 }, { "epoch": 52.33317976596333, "learning_rate": 1.261904761904762e-05, "loss": 1.461721504684043, "step": 35482 }, { "epoch": 52.66635953192666, "learning_rate": 1.2380952380952381e-05, "loss": 1.472177421097207, "step": 35708 }, { "epoch": 52.99953929788998, "learning_rate": 1.2142857142857144e-05, "loss": 1.470105331555932, "step": 35934 }, { "epoch": 53.33317976596333, "learning_rate": 1.1904761904761905e-05, "loss": 1.463019582022608, "step": 36160 }, { "epoch": 53.66635953192666, "learning_rate": 1.1666666666666668e-05, "loss": 1.4712693205976908, "step": 36386 }, { "epoch": 53.99953929788998, "learning_rate": 1.1428571428571429e-05, "loss": 1.4639480725853844, "step": 36612 }, { "epoch": 54.33317976596333, "learning_rate": 1.1190476190476192e-05, "loss": 1.4621197185685149, "step": 36838 }, { "epoch": 54.66635953192666, "learning_rate": 1.0952380952380953e-05, "loss": 1.4438346930309736, "step": 37064 }, { "epoch": 54.99953929788998, "learning_rate": 1.0714285714285714e-05, "loss": 1.45255799420112, "step": 37290 }, { "epoch": 55.33317976596333, "learning_rate": 1.0476190476190477e-05, "loss": 1.4527645955043555, "step": 37516 }, { "epoch": 55.66635953192666, "learning_rate": 1.0238095238095238e-05, "loss": 1.4556512073077987, "step": 37742 }, { "epoch": 55.99953929788998, "learning_rate": 1e-05, "loss": 1.4523168243138136, "step": 37968 }, { "epoch": 56.33317976596333, "learning_rate": 9.761904761904762e-06, "loss": 1.4451588318411228, "step": 38194 }, { "epoch": 56.66635953192666, "learning_rate": 9.523809523809523e-06, "loss": 1.4351428546736726, "step": 38420 }, { "epoch": 56.99953929788998, "learning_rate": 9.285714285714286e-06, "loss": 1.4480798771951051, "step": 38646 }, { "epoch": 57.33317976596333, "learning_rate": 9.047619047619047e-06, "loss": 1.4419262641299087, "step": 38872 }, { "epoch": 57.66635953192666, "learning_rate": 8.80952380952381e-06, "loss": 1.4400379552250415, "step": 39098 }, { "epoch": 57.99953929788998, "learning_rate": 8.571428571428573e-06, "loss": 1.4458791006982854, "step": 39324 }, { "epoch": 58.33317976596333, "learning_rate": 8.333333333333334e-06, "loss": 1.4246890987970133, "step": 39550 }, { "epoch": 58.66635953192666, "learning_rate": 8.095238095238097e-06, "loss": 1.4372091377730918, "step": 39776 }, { "epoch": 58.99953929788998, "learning_rate": 7.857142857142858e-06, "loss": 1.4388618131654451, "step": 40002 }, { "epoch": 59.33317976596333, "learning_rate": 7.6190476190476205e-06, "loss": 1.437955738168902, "step": 40228 }, { "epoch": 59.66635953192666, "learning_rate": 7.380952380952382e-06, "loss": 1.4384045896276962, "step": 40454 }, { "epoch": 59.99953929788998, "learning_rate": 7.142857142857143e-06, "loss": 1.4317560786694552, "step": 40680 }, { "epoch": 60.33317976596333, "learning_rate": 6.9047619047619055e-06, "loss": 1.4312950741928236, "step": 40906 }, { "epoch": 60.66635953192666, "learning_rate": 6.666666666666667e-06, "loss": 1.4349378737728153, "step": 41132 }, { "epoch": 60.99953929788998, "learning_rate": 6.428571428571429e-06, "loss": 1.4232025146484375, "step": 41358 }, { "epoch": 61.33317976596333, "learning_rate": 6.190476190476191e-06, "loss": 1.4273036180344303, "step": 41584 }, { "epoch": 61.66635953192666, "learning_rate": 5.9523809523809525e-06, "loss": 1.437505671408324, "step": 41810 }, { "epoch": 61.99953929788998, "learning_rate": 5.7142857142857145e-06, "loss": 1.4295697507605087, "step": 42036 }, { "epoch": 62.33317976596333, "learning_rate": 5.4761904761904765e-06, "loss": 1.4301999522521434, "step": 42262 }, { "epoch": 62.66635953192666, "learning_rate": 5.2380952380952384e-06, "loss": 1.4240087998651825, "step": 42488 }, { "epoch": 62.99953929788998, "learning_rate": 5e-06, "loss": 1.418294991012168, "step": 42714 }, { "epoch": 63.33317976596333, "learning_rate": 4.7619047619047615e-06, "loss": 1.4275296641662059, "step": 42940 }, { "epoch": 63.66635953192666, "learning_rate": 4.5238095238095235e-06, "loss": 1.4242185069396434, "step": 43166 }, { "epoch": 63.99953929788998, "learning_rate": 4.285714285714286e-06, "loss": 1.4242925053149198, "step": 43392 }, { "epoch": 64.33317976596332, "learning_rate": 4.047619047619048e-06, "loss": 1.4117900206979397, "step": 43618 }, { "epoch": 64.66635953192666, "learning_rate": 3.8095238095238102e-06, "loss": 1.4196827306156665, "step": 43844 }, { "epoch": 64.99953929788998, "learning_rate": 3.5714285714285714e-06, "loss": 1.4127752253439574, "step": 44070 }, { "epoch": 65.33317976596332, "learning_rate": 3.3333333333333333e-06, "loss": 1.415105228930448, "step": 44296 }, { "epoch": 65.66635953192666, "learning_rate": 3.0952380952380953e-06, "loss": 1.4221684278640072, "step": 44522 }, { "epoch": 65.99953929788998, "learning_rate": 2.8571428571428573e-06, "loss": 1.4126794865701051, "step": 44748 }, { "epoch": 66.33317976596332, "learning_rate": 2.6190476190476192e-06, "loss": 1.412820731644082, "step": 44974 }, { "epoch": 66.66635953192666, "learning_rate": 2.3809523809523808e-06, "loss": 1.4091276995903623, "step": 45200 }, { "epoch": 66.99953929788998, "learning_rate": 2.142857142857143e-06, "loss": 1.413559094994469, "step": 45426 }, { "epoch": 67.33317976596332, "learning_rate": 1.9047619047619051e-06, "loss": 1.4078961937828403, "step": 45652 }, { "epoch": 67.66635953192666, "learning_rate": 1.6666666666666667e-06, "loss": 1.4104151092799364, "step": 45878 }, { "epoch": 67.99953929788998, "learning_rate": 1.4285714285714286e-06, "loss": 1.4099604513792865, "step": 46104 }, { "epoch": 68.33317976596332, "learning_rate": 1.1904761904761904e-06, "loss": 1.406501297402171, "step": 46330 }, { "epoch": 68.66635953192666, "learning_rate": 9.523809523809526e-07, "loss": 1.4021470061445658, "step": 46556 }, { "epoch": 68.99953929788998, "learning_rate": 7.142857142857143e-07, "loss": 1.4063275092470962, "step": 46782 } ], "max_steps": 47460, "num_train_epochs": 70, "total_flos": 531450425497308624, "trial_name": null, "trial_params": null }