|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 185.1851851851852, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06172839506172839, |
|
"learning_rate": 5e-09, |
|
"loss": 8.604, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"learning_rate": 1e-07, |
|
"loss": 8.0873, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"learning_rate": 2e-07, |
|
"loss": 7.8074, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"learning_rate": 3e-07, |
|
"loss": 7.0763, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"learning_rate": 4e-07, |
|
"loss": 6.2969, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.172839506172839, |
|
"learning_rate": 5e-07, |
|
"loss": 5.9334, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"learning_rate": 6e-07, |
|
"loss": 5.4919, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.641975308641975, |
|
"learning_rate": 7e-07, |
|
"loss": 5.0659, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"learning_rate": 8e-07, |
|
"loss": 4.9089, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"learning_rate": 9e-07, |
|
"loss": 4.7144, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.345679012345679, |
|
"learning_rate": 1e-06, |
|
"loss": 4.7373, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.580246913580247, |
|
"learning_rate": 9.953798767967146e-07, |
|
"loss": 4.3848, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"learning_rate": 9.90759753593429e-07, |
|
"loss": 4.3461, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 16.049382716049383, |
|
"learning_rate": 9.861396303901435e-07, |
|
"loss": 4.2226, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 17.28395061728395, |
|
"learning_rate": 9.815195071868584e-07, |
|
"loss": 4.1291, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 18.51851851851852, |
|
"learning_rate": 9.768993839835729e-07, |
|
"loss": 4.0118, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 19.753086419753085, |
|
"learning_rate": 9.722792607802873e-07, |
|
"loss": 4.0819, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 20.987654320987655, |
|
"learning_rate": 9.67659137577002e-07, |
|
"loss": 3.9431, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"learning_rate": 9.630390143737167e-07, |
|
"loss": 3.8781, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 23.45679012345679, |
|
"learning_rate": 9.58418891170431e-07, |
|
"loss": 3.8416, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 24.691358024691358, |
|
"learning_rate": 9.537987679671456e-07, |
|
"loss": 3.744, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.925925925925927, |
|
"learning_rate": 9.491786447638603e-07, |
|
"loss": 3.7832, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 27.160493827160494, |
|
"learning_rate": 9.445585215605749e-07, |
|
"loss": 3.7876, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 28.395061728395063, |
|
"learning_rate": 9.399383983572896e-07, |
|
"loss": 3.6849, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 29.62962962962963, |
|
"learning_rate": 9.35318275154004e-07, |
|
"loss": 3.7099, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 30.864197530864196, |
|
"learning_rate": 9.306981519507186e-07, |
|
"loss": 3.7103, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 32.098765432098766, |
|
"learning_rate": 9.260780287474332e-07, |
|
"loss": 3.6856, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 33.333333333333336, |
|
"learning_rate": 9.214579055441478e-07, |
|
"loss": 3.625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 34.5679012345679, |
|
"learning_rate": 9.168377823408624e-07, |
|
"loss": 3.5128, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 35.80246913580247, |
|
"learning_rate": 9.122176591375768e-07, |
|
"loss": 3.4695, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 37.03703703703704, |
|
"learning_rate": 9.075975359342916e-07, |
|
"loss": 3.5786, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 38.27160493827161, |
|
"learning_rate": 9.029774127310061e-07, |
|
"loss": 3.4751, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 39.50617283950617, |
|
"learning_rate": 8.983572895277207e-07, |
|
"loss": 3.4836, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 40.74074074074074, |
|
"learning_rate": 8.937371663244353e-07, |
|
"loss": 3.4209, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 41.97530864197531, |
|
"learning_rate": 8.891170431211498e-07, |
|
"loss": 3.3073, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 43.20987654320987, |
|
"learning_rate": 8.844969199178645e-07, |
|
"loss": 3.3712, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 44.44444444444444, |
|
"learning_rate": 8.79876796714579e-07, |
|
"loss": 3.3412, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 45.67901234567901, |
|
"learning_rate": 8.752566735112936e-07, |
|
"loss": 3.5548, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 46.91358024691358, |
|
"learning_rate": 8.706365503080082e-07, |
|
"loss": 3.5773, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 48.148148148148145, |
|
"learning_rate": 8.660164271047227e-07, |
|
"loss": 3.3696, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 49.382716049382715, |
|
"learning_rate": 8.613963039014373e-07, |
|
"loss": 3.2795, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 50.617283950617285, |
|
"learning_rate": 8.56776180698152e-07, |
|
"loss": 3.2955, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 51.851851851851855, |
|
"learning_rate": 8.521560574948665e-07, |
|
"loss": 3.2566, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 53.08641975308642, |
|
"learning_rate": 8.475359342915811e-07, |
|
"loss": 3.2109, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 54.32098765432099, |
|
"learning_rate": 8.429158110882956e-07, |
|
"loss": 3.1969, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 55.55555555555556, |
|
"learning_rate": 8.382956878850103e-07, |
|
"loss": 3.1772, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 56.79012345679013, |
|
"learning_rate": 8.336755646817249e-07, |
|
"loss": 3.175, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 58.02469135802469, |
|
"learning_rate": 8.290554414784393e-07, |
|
"loss": 3.1683, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 59.25925925925926, |
|
"learning_rate": 8.244353182751539e-07, |
|
"loss": 3.1107, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 60.49382716049383, |
|
"learning_rate": 8.198151950718686e-07, |
|
"loss": 3.1564, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 61.72839506172839, |
|
"learning_rate": 8.151950718685832e-07, |
|
"loss": 3.0278, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 62.96296296296296, |
|
"learning_rate": 8.105749486652977e-07, |
|
"loss": 3.0933, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 64.19753086419753, |
|
"learning_rate": 8.059548254620122e-07, |
|
"loss": 3.1135, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 65.4320987654321, |
|
"learning_rate": 8.013347022587269e-07, |
|
"loss": 3.1195, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 66.66666666666667, |
|
"learning_rate": 7.967145790554415e-07, |
|
"loss": 3.0757, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 67.90123456790124, |
|
"learning_rate": 7.92094455852156e-07, |
|
"loss": 2.9796, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 69.1358024691358, |
|
"learning_rate": 7.874743326488706e-07, |
|
"loss": 3.0387, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 70.37037037037037, |
|
"learning_rate": 7.828542094455851e-07, |
|
"loss": 2.9829, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 71.60493827160494, |
|
"learning_rate": 7.782340862422998e-07, |
|
"loss": 2.9699, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 72.8395061728395, |
|
"learning_rate": 7.736139630390144e-07, |
|
"loss": 3.0227, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 74.07407407407408, |
|
"learning_rate": 7.689938398357289e-07, |
|
"loss": 3.0769, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 75.30864197530865, |
|
"learning_rate": 7.643737166324435e-07, |
|
"loss": 2.9983, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 76.54320987654322, |
|
"learning_rate": 7.59753593429158e-07, |
|
"loss": 3.0172, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 77.77777777777777, |
|
"learning_rate": 7.551334702258726e-07, |
|
"loss": 2.8962, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 79.01234567901234, |
|
"learning_rate": 7.505133470225873e-07, |
|
"loss": 2.9221, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 80.24691358024691, |
|
"learning_rate": 7.458932238193018e-07, |
|
"loss": 2.9511, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 81.48148148148148, |
|
"learning_rate": 7.412731006160164e-07, |
|
"loss": 2.9408, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 82.71604938271605, |
|
"learning_rate": 7.36652977412731e-07, |
|
"loss": 2.9079, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 83.95061728395062, |
|
"learning_rate": 7.320328542094455e-07, |
|
"loss": 2.9289, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 85.18518518518519, |
|
"learning_rate": 7.274127310061602e-07, |
|
"loss": 2.9366, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 86.41975308641975, |
|
"learning_rate": 7.227926078028747e-07, |
|
"loss": 2.9584, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 87.65432098765432, |
|
"learning_rate": 7.181724845995892e-07, |
|
"loss": 2.8547, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 88.88888888888889, |
|
"learning_rate": 7.135523613963038e-07, |
|
"loss": 2.8381, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 90.12345679012346, |
|
"learning_rate": 7.089322381930185e-07, |
|
"loss": 2.8748, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 91.35802469135803, |
|
"learning_rate": 7.04312114989733e-07, |
|
"loss": 2.9128, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 92.5925925925926, |
|
"learning_rate": 6.996919917864475e-07, |
|
"loss": 2.8814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 93.82716049382717, |
|
"learning_rate": 6.950718685831621e-07, |
|
"loss": 2.8452, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 95.06172839506173, |
|
"learning_rate": 6.904517453798768e-07, |
|
"loss": 2.8592, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 96.29629629629629, |
|
"learning_rate": 6.858316221765913e-07, |
|
"loss": 2.8163, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 97.53086419753086, |
|
"learning_rate": 6.812114989733059e-07, |
|
"loss": 2.786, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 98.76543209876543, |
|
"learning_rate": 6.765913757700205e-07, |
|
"loss": 2.8323, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6.719712525667351e-07, |
|
"loss": 2.8326, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 101.23456790123457, |
|
"learning_rate": 6.673511293634497e-07, |
|
"loss": 2.8083, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 102.46913580246914, |
|
"learning_rate": 6.627310061601642e-07, |
|
"loss": 2.8271, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 103.70370370370371, |
|
"learning_rate": 6.581108829568788e-07, |
|
"loss": 2.8487, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 104.93827160493827, |
|
"learning_rate": 6.534907597535934e-07, |
|
"loss": 2.7786, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 106.17283950617283, |
|
"learning_rate": 6.488706365503079e-07, |
|
"loss": 2.8222, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 107.4074074074074, |
|
"learning_rate": 6.442505133470226e-07, |
|
"loss": 2.7864, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 108.64197530864197, |
|
"learning_rate": 6.39630390143737e-07, |
|
"loss": 2.7766, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 109.87654320987654, |
|
"learning_rate": 6.350102669404517e-07, |
|
"loss": 2.7741, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 111.11111111111111, |
|
"learning_rate": 6.303901437371663e-07, |
|
"loss": 2.8072, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 112.34567901234568, |
|
"learning_rate": 6.257700205338809e-07, |
|
"loss": 2.7551, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 113.58024691358025, |
|
"learning_rate": 6.211498973305955e-07, |
|
"loss": 2.7096, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 114.81481481481481, |
|
"learning_rate": 6.165297741273101e-07, |
|
"loss": 2.7817, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 116.04938271604938, |
|
"learning_rate": 6.119096509240245e-07, |
|
"loss": 2.7716, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 117.28395061728395, |
|
"learning_rate": 6.072895277207392e-07, |
|
"loss": 2.7476, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 118.51851851851852, |
|
"learning_rate": 6.026694045174538e-07, |
|
"loss": 2.758, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 119.75308641975309, |
|
"learning_rate": 5.980492813141684e-07, |
|
"loss": 2.7445, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 120.98765432098766, |
|
"learning_rate": 5.934291581108829e-07, |
|
"loss": 2.7234, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 122.22222222222223, |
|
"learning_rate": 5.888090349075973e-07, |
|
"loss": 2.7103, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 123.45679012345678, |
|
"learning_rate": 5.841889117043121e-07, |
|
"loss": 2.7609, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 124.69135802469135, |
|
"learning_rate": 5.795687885010268e-07, |
|
"loss": 2.7613, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 125.92592592592592, |
|
"learning_rate": 5.749486652977412e-07, |
|
"loss": 2.7793, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 127.1604938271605, |
|
"learning_rate": 5.703285420944558e-07, |
|
"loss": 2.6717, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 128.39506172839506, |
|
"learning_rate": 5.657084188911705e-07, |
|
"loss": 2.6988, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 129.62962962962962, |
|
"learning_rate": 5.61088295687885e-07, |
|
"loss": 2.7089, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 130.8641975308642, |
|
"learning_rate": 5.564681724845996e-07, |
|
"loss": 2.6745, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 132.09876543209876, |
|
"learning_rate": 5.518480492813141e-07, |
|
"loss": 2.6574, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 133.33333333333334, |
|
"learning_rate": 5.472279260780288e-07, |
|
"loss": 2.6682, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 134.5679012345679, |
|
"learning_rate": 5.426078028747432e-07, |
|
"loss": 2.7689, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 135.80246913580248, |
|
"learning_rate": 5.379876796714579e-07, |
|
"loss": 2.6941, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 137.03703703703704, |
|
"learning_rate": 5.333675564681725e-07, |
|
"loss": 2.692, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 138.2716049382716, |
|
"learning_rate": 5.28747433264887e-07, |
|
"loss": 2.6515, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 139.50617283950618, |
|
"learning_rate": 5.241273100616017e-07, |
|
"loss": 2.7289, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 140.74074074074073, |
|
"learning_rate": 5.195071868583161e-07, |
|
"loss": 2.6351, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 141.97530864197532, |
|
"learning_rate": 5.148870636550308e-07, |
|
"loss": 2.6695, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 143.20987654320987, |
|
"learning_rate": 5.102669404517453e-07, |
|
"loss": 2.6735, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 144.44444444444446, |
|
"learning_rate": 5.056468172484599e-07, |
|
"loss": 2.6271, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 145.679012345679, |
|
"learning_rate": 5.010266940451745e-07, |
|
"loss": 2.677, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 146.91358024691357, |
|
"learning_rate": 4.964065708418891e-07, |
|
"loss": 2.6575, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 148.14814814814815, |
|
"learning_rate": 4.917864476386037e-07, |
|
"loss": 2.5944, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 149.3827160493827, |
|
"learning_rate": 4.871663244353182e-07, |
|
"loss": 2.6355, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 150.6172839506173, |
|
"learning_rate": 4.825462012320328e-07, |
|
"loss": 2.6434, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 151.85185185185185, |
|
"learning_rate": 4.779260780287474e-07, |
|
"loss": 2.6251, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 153.08641975308643, |
|
"learning_rate": 4.73305954825462e-07, |
|
"loss": 2.6228, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 154.320987654321, |
|
"learning_rate": 4.686858316221766e-07, |
|
"loss": 2.6255, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 155.55555555555554, |
|
"learning_rate": 4.640657084188911e-07, |
|
"loss": 2.6125, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 156.79012345679013, |
|
"learning_rate": 4.594455852156057e-07, |
|
"loss": 2.6306, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 158.02469135802468, |
|
"learning_rate": 4.5482546201232026e-07, |
|
"loss": 2.6137, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 159.25925925925927, |
|
"learning_rate": 4.502053388090349e-07, |
|
"loss": 2.6427, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 160.49382716049382, |
|
"learning_rate": 4.4558521560574943e-07, |
|
"loss": 2.6137, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 161.7283950617284, |
|
"learning_rate": 4.409650924024641e-07, |
|
"loss": 2.6186, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 162.96296296296296, |
|
"learning_rate": 4.363449691991786e-07, |
|
"loss": 2.6397, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 164.19753086419752, |
|
"learning_rate": 4.3172484599589327e-07, |
|
"loss": 2.6389, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 165.4320987654321, |
|
"learning_rate": 4.271047227926078e-07, |
|
"loss": 2.5892, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 166.66666666666666, |
|
"learning_rate": 4.2248459958932234e-07, |
|
"loss": 2.6166, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 167.90123456790124, |
|
"learning_rate": 4.1786447638603695e-07, |
|
"loss": 2.5763, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 169.1358024691358, |
|
"learning_rate": 4.132443531827515e-07, |
|
"loss": 2.643, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 170.37037037037038, |
|
"learning_rate": 4.0862422997946613e-07, |
|
"loss": 2.5972, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 171.60493827160494, |
|
"learning_rate": 4.040041067761807e-07, |
|
"loss": 2.5463, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 172.8395061728395, |
|
"learning_rate": 3.993839835728953e-07, |
|
"loss": 2.6293, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 174.07407407407408, |
|
"learning_rate": 3.947638603696098e-07, |
|
"loss": 2.5722, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 175.30864197530863, |
|
"learning_rate": 3.9014373716632437e-07, |
|
"loss": 2.5601, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 176.54320987654322, |
|
"learning_rate": 3.85523613963039e-07, |
|
"loss": 2.5368, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 177.77777777777777, |
|
"learning_rate": 3.8090349075975354e-07, |
|
"loss": 2.5328, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 179.01234567901236, |
|
"learning_rate": 3.7628336755646816e-07, |
|
"loss": 2.5885, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 180.2469135802469, |
|
"learning_rate": 3.716632443531827e-07, |
|
"loss": 2.5708, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 181.4814814814815, |
|
"learning_rate": 3.6704312114989733e-07, |
|
"loss": 2.548, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 182.71604938271605, |
|
"learning_rate": 3.624229979466119e-07, |
|
"loss": 2.5854, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 183.9506172839506, |
|
"learning_rate": 3.578028747433265e-07, |
|
"loss": 2.5578, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 185.1851851851852, |
|
"learning_rate": 3.5318275154004107e-07, |
|
"loss": 2.6103, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 4096, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 256, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8772602674397184.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|