{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9999949945189983, "global_step": 299670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.989988987886675e-05, "loss": 3.7278, "step": 500 }, { "epoch": 0.01, "learning_rate": 5.979977975773351e-05, "loss": 3.7216, "step": 1000 }, { "epoch": 0.02, "learning_rate": 5.9699669636600265e-05, "loss": 3.715, "step": 1500 }, { "epoch": 0.02, "learning_rate": 5.9599559515467015e-05, "loss": 3.7158, "step": 2000 }, { "epoch": 0.03, "learning_rate": 5.949944939433377e-05, "loss": 3.7145, "step": 2500 }, { "epoch": 0.03, "learning_rate": 5.939933927320052e-05, "loss": 3.7008, "step": 3000 }, { "epoch": 0.04, "learning_rate": 5.929922915206727e-05, "loss": 3.6985, "step": 3500 }, { "epoch": 0.04, "learning_rate": 5.919911903093403e-05, "loss": 3.6897, "step": 4000 }, { "epoch": 0.05, "learning_rate": 5.9099008909800785e-05, "loss": 3.6934, "step": 4500 }, { "epoch": 0.05, "learning_rate": 5.8998898788667535e-05, "loss": 3.6838, "step": 5000 }, { "epoch": 0.06, "learning_rate": 5.889878866753429e-05, "loss": 3.6917, "step": 5500 }, { "epoch": 0.06, "learning_rate": 5.879867854640104e-05, "loss": 3.6839, "step": 6000 }, { "epoch": 0.07, "learning_rate": 5.869856842526779e-05, "loss": 3.6839, "step": 6500 }, { "epoch": 0.07, "learning_rate": 5.8598458304134556e-05, "loss": 3.6904, "step": 7000 }, { "epoch": 0.08, "learning_rate": 5.8498348183001306e-05, "loss": 3.687, "step": 7500 }, { "epoch": 0.08, "learning_rate": 5.8398238061868056e-05, "loss": 3.6835, "step": 8000 }, { "epoch": 0.09, "learning_rate": 5.8298127940734806e-05, "loss": 3.6846, "step": 8500 }, { "epoch": 0.09, "learning_rate": 5.819801781960156e-05, "loss": 3.6819, "step": 9000 }, { "epoch": 0.1, "learning_rate": 5.809790769846831e-05, "loss": 3.6802, "step": 9500 }, { "epoch": 0.1, "learning_rate": 5.799779757733507e-05, "loss": 3.6818, "step": 10000 }, { "epoch": 0.11, "learning_rate": 5.7897687456201826e-05, "loss": 3.6936, "step": 10500 }, { "epoch": 0.11, "learning_rate": 5.7797577335068576e-05, "loss": 3.6757, "step": 11000 }, { "epoch": 0.12, "learning_rate": 5.7697467213935326e-05, "loss": 3.6784, "step": 11500 }, { "epoch": 0.12, "learning_rate": 5.759735709280208e-05, "loss": 3.6812, "step": 12000 }, { "epoch": 0.13, "learning_rate": 5.749724697166884e-05, "loss": 3.6756, "step": 12500 }, { "epoch": 0.13, "learning_rate": 5.739713685053559e-05, "loss": 3.6803, "step": 13000 }, { "epoch": 0.14, "learning_rate": 5.7297026729402346e-05, "loss": 3.6851, "step": 13500 }, { "epoch": 0.14, "learning_rate": 5.7196916608269096e-05, "loss": 3.668, "step": 14000 }, { "epoch": 0.15, "learning_rate": 5.7096806487135846e-05, "loss": 3.6732, "step": 14500 }, { "epoch": 0.15, "learning_rate": 5.699669636600261e-05, "loss": 3.6791, "step": 15000 }, { "epoch": 0.16, "learning_rate": 5.689658624486936e-05, "loss": 3.6733, "step": 15500 }, { "epoch": 0.16, "learning_rate": 5.679647612373611e-05, "loss": 3.6656, "step": 16000 }, { "epoch": 0.17, "learning_rate": 5.6696366002602866e-05, "loss": 3.67, "step": 16500 }, { "epoch": 0.17, "learning_rate": 5.6596255881469616e-05, "loss": 3.6716, "step": 17000 }, { "epoch": 0.18, "learning_rate": 5.6496145760336366e-05, "loss": 3.671, "step": 17500 }, { "epoch": 0.18, "learning_rate": 5.639603563920313e-05, "loss": 3.6723, "step": 18000 }, { "epoch": 0.19, "learning_rate": 5.629592551806988e-05, "loss": 3.6755, "step": 18500 }, { "epoch": 0.19, "learning_rate": 5.619581539693663e-05, "loss": 3.6694, "step": 19000 }, { "epoch": 0.2, "learning_rate": 5.6095705275803387e-05, "loss": 3.6714, "step": 19500 }, { "epoch": 0.2, "learning_rate": 5.5995595154670137e-05, "loss": 3.6688, "step": 20000 }, { "epoch": 0.21, "learning_rate": 5.589548503353689e-05, "loss": 3.6638, "step": 20500 }, { "epoch": 0.21, "learning_rate": 5.579537491240365e-05, "loss": 3.6639, "step": 21000 }, { "epoch": 0.22, "learning_rate": 5.56952647912704e-05, "loss": 3.6567, "step": 21500 }, { "epoch": 0.22, "learning_rate": 5.559515467013715e-05, "loss": 3.6708, "step": 22000 }, { "epoch": 0.23, "learning_rate": 5.549504454900391e-05, "loss": 3.6601, "step": 22500 }, { "epoch": 0.23, "learning_rate": 5.539493442787066e-05, "loss": 3.6652, "step": 23000 }, { "epoch": 0.24, "learning_rate": 5.5294824306737414e-05, "loss": 3.6653, "step": 23500 }, { "epoch": 0.24, "learning_rate": 5.519471418560417e-05, "loss": 3.6492, "step": 24000 }, { "epoch": 0.25, "learning_rate": 5.509460406447092e-05, "loss": 3.6575, "step": 24500 }, { "epoch": 0.25, "learning_rate": 5.499449394333767e-05, "loss": 3.656, "step": 25000 }, { "epoch": 0.26, "learning_rate": 5.489438382220442e-05, "loss": 3.6545, "step": 25500 }, { "epoch": 0.26, "learning_rate": 5.4794273701071184e-05, "loss": 3.6486, "step": 26000 }, { "epoch": 0.27, "learning_rate": 5.4694163579937934e-05, "loss": 3.6575, "step": 26500 }, { "epoch": 0.27, "learning_rate": 5.4594053458804684e-05, "loss": 3.6579, "step": 27000 }, { "epoch": 0.28, "learning_rate": 5.449394333767144e-05, "loss": 3.6551, "step": 27500 }, { "epoch": 0.28, "learning_rate": 5.439383321653819e-05, "loss": 3.6531, "step": 28000 }, { "epoch": 0.29, "learning_rate": 5.429372309540494e-05, "loss": 3.654, "step": 28500 }, { "epoch": 0.29, "learning_rate": 5.4193612974271704e-05, "loss": 3.6438, "step": 29000 }, { "epoch": 0.3, "learning_rate": 5.4093502853138454e-05, "loss": 3.6519, "step": 29500 }, { "epoch": 0.3, "learning_rate": 5.3993392732005204e-05, "loss": 3.6542, "step": 30000 }, { "epoch": 0.31, "learning_rate": 5.389328261087196e-05, "loss": 3.6478, "step": 30500 }, { "epoch": 0.31, "learning_rate": 5.379317248973871e-05, "loss": 3.6613, "step": 31000 }, { "epoch": 0.32, "learning_rate": 5.369306236860547e-05, "loss": 3.6529, "step": 31500 }, { "epoch": 0.32, "learning_rate": 5.3592952247472224e-05, "loss": 3.6515, "step": 32000 }, { "epoch": 0.33, "learning_rate": 5.3492842126338974e-05, "loss": 3.6509, "step": 32500 }, { "epoch": 0.33, "learning_rate": 5.3392732005205724e-05, "loss": 3.66, "step": 33000 }, { "epoch": 0.34, "learning_rate": 5.329262188407248e-05, "loss": 3.6488, "step": 33500 }, { "epoch": 0.34, "learning_rate": 5.319251176293924e-05, "loss": 3.6559, "step": 34000 }, { "epoch": 0.35, "learning_rate": 5.309240164180599e-05, "loss": 3.6479, "step": 34500 }, { "epoch": 0.35, "learning_rate": 5.2992291520672745e-05, "loss": 3.6433, "step": 35000 }, { "epoch": 0.36, "learning_rate": 5.2892181399539495e-05, "loss": 3.6491, "step": 35500 }, { "epoch": 0.36, "learning_rate": 5.2792071278406245e-05, "loss": 3.655, "step": 36000 }, { "epoch": 0.37, "learning_rate": 5.2691961157273e-05, "loss": 3.6439, "step": 36500 }, { "epoch": 0.37, "learning_rate": 5.259185103613976e-05, "loss": 3.6434, "step": 37000 }, { "epoch": 0.38, "learning_rate": 5.249174091500651e-05, "loss": 3.6409, "step": 37500 }, { "epoch": 0.38, "learning_rate": 5.2391630793873265e-05, "loss": 3.6415, "step": 38000 }, { "epoch": 0.39, "learning_rate": 5.2291520672740015e-05, "loss": 3.6454, "step": 38500 }, { "epoch": 0.39, "learning_rate": 5.2191410551606765e-05, "loss": 3.646, "step": 39000 }, { "epoch": 0.4, "learning_rate": 5.209130043047353e-05, "loss": 3.6498, "step": 39500 }, { "epoch": 0.4, "learning_rate": 5.199119030934028e-05, "loss": 3.6407, "step": 40000 }, { "epoch": 0.41, "learning_rate": 5.189108018820703e-05, "loss": 3.6425, "step": 40500 }, { "epoch": 0.41, "learning_rate": 5.1790970067073785e-05, "loss": 3.6435, "step": 41000 }, { "epoch": 0.42, "learning_rate": 5.1690859945940535e-05, "loss": 3.6485, "step": 41500 }, { "epoch": 0.42, "learning_rate": 5.1590749824807285e-05, "loss": 3.6401, "step": 42000 }, { "epoch": 0.43, "learning_rate": 5.149063970367404e-05, "loss": 3.6369, "step": 42500 }, { "epoch": 0.43, "learning_rate": 5.13905295825408e-05, "loss": 3.634, "step": 43000 }, { "epoch": 0.44, "learning_rate": 5.129041946140755e-05, "loss": 3.6396, "step": 43500 }, { "epoch": 0.44, "learning_rate": 5.11903093402743e-05, "loss": 3.6362, "step": 44000 }, { "epoch": 0.45, "learning_rate": 5.1090199219141055e-05, "loss": 3.6328, "step": 44500 }, { "epoch": 0.45, "learning_rate": 5.099008909800781e-05, "loss": 3.6389, "step": 45000 }, { "epoch": 0.46, "learning_rate": 5.088997897687456e-05, "loss": 3.6455, "step": 45500 }, { "epoch": 0.46, "learning_rate": 5.078986885574132e-05, "loss": 3.6348, "step": 46000 }, { "epoch": 0.47, "learning_rate": 5.068975873460807e-05, "loss": 3.6386, "step": 46500 }, { "epoch": 0.47, "learning_rate": 5.058964861347482e-05, "loss": 3.6311, "step": 47000 }, { "epoch": 0.48, "learning_rate": 5.0489538492341576e-05, "loss": 3.6348, "step": 47500 }, { "epoch": 0.48, "learning_rate": 5.038942837120833e-05, "loss": 3.6394, "step": 48000 }, { "epoch": 0.49, "learning_rate": 5.028931825007508e-05, "loss": 3.6309, "step": 48500 }, { "epoch": 0.49, "learning_rate": 5.018920812894184e-05, "loss": 3.6308, "step": 49000 }, { "epoch": 0.5, "learning_rate": 5.008909800780859e-05, "loss": 3.6285, "step": 49500 }, { "epoch": 0.5, "learning_rate": 4.998898788667534e-05, "loss": 3.6377, "step": 50000 }, { "epoch": 0.51, "learning_rate": 4.98888777655421e-05, "loss": 3.6318, "step": 50500 }, { "epoch": 0.51, "learning_rate": 4.978876764440885e-05, "loss": 3.6303, "step": 51000 }, { "epoch": 0.52, "learning_rate": 4.96886575232756e-05, "loss": 3.6279, "step": 51500 }, { "epoch": 0.52, "learning_rate": 4.958854740214236e-05, "loss": 3.6245, "step": 52000 }, { "epoch": 0.53, "learning_rate": 4.948843728100911e-05, "loss": 3.6232, "step": 52500 }, { "epoch": 0.53, "learning_rate": 4.9388327159875866e-05, "loss": 3.6305, "step": 53000 }, { "epoch": 0.54, "learning_rate": 4.928821703874262e-05, "loss": 3.6197, "step": 53500 }, { "epoch": 0.54, "learning_rate": 4.918810691760937e-05, "loss": 3.6225, "step": 54000 }, { "epoch": 0.55, "learning_rate": 4.908799679647612e-05, "loss": 3.6248, "step": 54500 }, { "epoch": 0.55, "learning_rate": 4.898788667534288e-05, "loss": 3.6244, "step": 55000 }, { "epoch": 0.56, "learning_rate": 4.888777655420963e-05, "loss": 3.6323, "step": 55500 }, { "epoch": 0.56, "learning_rate": 4.8787666433076386e-05, "loss": 3.6215, "step": 56000 }, { "epoch": 0.57, "learning_rate": 4.868755631194314e-05, "loss": 3.6286, "step": 56500 }, { "epoch": 0.57, "learning_rate": 4.858744619080989e-05, "loss": 3.6305, "step": 57000 }, { "epoch": 0.58, "learning_rate": 4.848733606967664e-05, "loss": 3.6207, "step": 57500 }, { "epoch": 0.58, "learning_rate": 4.838722594854339e-05, "loss": 3.6255, "step": 58000 }, { "epoch": 0.59, "learning_rate": 4.828711582741016e-05, "loss": 3.6254, "step": 58500 }, { "epoch": 0.59, "learning_rate": 4.818700570627691e-05, "loss": 3.6274, "step": 59000 }, { "epoch": 0.6, "learning_rate": 4.808689558514366e-05, "loss": 3.6276, "step": 59500 }, { "epoch": 0.6, "learning_rate": 4.7986785464010413e-05, "loss": 3.6242, "step": 60000 }, { "epoch": 0.61, "learning_rate": 4.7886675342877163e-05, "loss": 3.6208, "step": 60500 }, { "epoch": 0.61, "learning_rate": 4.7786565221743913e-05, "loss": 3.6184, "step": 61000 }, { "epoch": 0.62, "learning_rate": 4.768645510061068e-05, "loss": 3.6234, "step": 61500 }, { "epoch": 0.62, "learning_rate": 4.758634497947743e-05, "loss": 3.6217, "step": 62000 }, { "epoch": 0.63, "learning_rate": 4.748623485834418e-05, "loss": 3.6199, "step": 62500 }, { "epoch": 0.63, "learning_rate": 4.7386124737210934e-05, "loss": 3.6157, "step": 63000 }, { "epoch": 0.64, "learning_rate": 4.7286014616077684e-05, "loss": 3.6195, "step": 63500 }, { "epoch": 0.64, "learning_rate": 4.718590449494444e-05, "loss": 3.6082, "step": 64000 }, { "epoch": 0.65, "learning_rate": 4.70857943738112e-05, "loss": 3.6117, "step": 64500 }, { "epoch": 0.65, "learning_rate": 4.698568425267795e-05, "loss": 3.6205, "step": 65000 }, { "epoch": 0.66, "learning_rate": 4.68855741315447e-05, "loss": 3.6128, "step": 65500 }, { "epoch": 0.66, "learning_rate": 4.6785464010411454e-05, "loss": 3.6186, "step": 66000 }, { "epoch": 0.67, "learning_rate": 4.6685353889278204e-05, "loss": 3.6168, "step": 66500 }, { "epoch": 0.67, "learning_rate": 4.658524376814496e-05, "loss": 3.6224, "step": 67000 }, { "epoch": 0.68, "learning_rate": 4.648513364701172e-05, "loss": 3.6112, "step": 67500 }, { "epoch": 0.68, "learning_rate": 4.638502352587847e-05, "loss": 3.6144, "step": 68000 }, { "epoch": 0.69, "learning_rate": 4.628491340474522e-05, "loss": 3.6178, "step": 68500 }, { "epoch": 0.69, "learning_rate": 4.6184803283611974e-05, "loss": 3.606, "step": 69000 }, { "epoch": 0.7, "learning_rate": 4.608469316247873e-05, "loss": 3.6104, "step": 69500 }, { "epoch": 0.7, "learning_rate": 4.598458304134548e-05, "loss": 3.6055, "step": 70000 }, { "epoch": 0.71, "learning_rate": 4.588447292021224e-05, "loss": 3.6067, "step": 70500 }, { "epoch": 0.71, "learning_rate": 4.578436279907899e-05, "loss": 3.6141, "step": 71000 }, { "epoch": 0.72, "learning_rate": 4.568425267794574e-05, "loss": 3.6147, "step": 71500 }, { "epoch": 0.72, "learning_rate": 4.55841425568125e-05, "loss": 3.6193, "step": 72000 }, { "epoch": 0.73, "learning_rate": 4.548403243567925e-05, "loss": 3.6088, "step": 72500 }, { "epoch": 0.73, "learning_rate": 4.5383922314546e-05, "loss": 3.6106, "step": 73000 }, { "epoch": 0.74, "learning_rate": 4.528381219341276e-05, "loss": 3.6058, "step": 73500 }, { "epoch": 0.74, "learning_rate": 4.518370207227951e-05, "loss": 3.615, "step": 74000 }, { "epoch": 0.75, "learning_rate": 4.508359195114626e-05, "loss": 3.6123, "step": 74500 }, { "epoch": 0.75, "learning_rate": 4.4983481830013015e-05, "loss": 3.6059, "step": 75000 }, { "epoch": 0.76, "learning_rate": 4.488337170887977e-05, "loss": 3.602, "step": 75500 }, { "epoch": 0.76, "learning_rate": 4.478326158774652e-05, "loss": 3.605, "step": 76000 }, { "epoch": 0.77, "learning_rate": 4.468315146661327e-05, "loss": 3.6099, "step": 76500 }, { "epoch": 0.77, "learning_rate": 4.458304134548003e-05, "loss": 3.602, "step": 77000 }, { "epoch": 0.78, "learning_rate": 4.4482931224346785e-05, "loss": 3.6058, "step": 77500 }, { "epoch": 0.78, "learning_rate": 4.4382821103213535e-05, "loss": 3.6044, "step": 78000 }, { "epoch": 0.79, "learning_rate": 4.428271098208029e-05, "loss": 3.6072, "step": 78500 }, { "epoch": 0.79, "learning_rate": 4.418260086094704e-05, "loss": 3.6015, "step": 79000 }, { "epoch": 0.8, "learning_rate": 4.408249073981379e-05, "loss": 3.6019, "step": 79500 }, { "epoch": 0.8, "learning_rate": 4.398238061868055e-05, "loss": 3.6099, "step": 80000 }, { "epoch": 0.81, "learning_rate": 4.3882270497547305e-05, "loss": 3.6005, "step": 80500 }, { "epoch": 0.81, "learning_rate": 4.3782160376414055e-05, "loss": 3.609, "step": 81000 }, { "epoch": 0.82, "learning_rate": 4.368205025528081e-05, "loss": 3.601, "step": 81500 }, { "epoch": 0.82, "learning_rate": 4.358194013414756e-05, "loss": 3.6013, "step": 82000 }, { "epoch": 0.83, "learning_rate": 4.348183001301431e-05, "loss": 3.6023, "step": 82500 }, { "epoch": 0.83, "learning_rate": 4.3381719891881076e-05, "loss": 3.6047, "step": 83000 }, { "epoch": 0.84, "learning_rate": 4.3281609770747826e-05, "loss": 3.5886, "step": 83500 }, { "epoch": 0.84, "learning_rate": 4.3181499649614575e-05, "loss": 3.5942, "step": 84000 }, { "epoch": 0.85, "learning_rate": 4.308138952848133e-05, "loss": 3.5961, "step": 84500 }, { "epoch": 0.85, "learning_rate": 4.298127940734808e-05, "loss": 3.6019, "step": 85000 }, { "epoch": 0.86, "learning_rate": 4.288116928621483e-05, "loss": 3.6043, "step": 85500 }, { "epoch": 0.86, "learning_rate": 4.2781059165081596e-05, "loss": 3.5922, "step": 86000 }, { "epoch": 0.87, "learning_rate": 4.2680949043948346e-05, "loss": 3.5973, "step": 86500 }, { "epoch": 0.87, "learning_rate": 4.2580838922815096e-05, "loss": 3.603, "step": 87000 }, { "epoch": 0.88, "learning_rate": 4.248072880168185e-05, "loss": 3.6052, "step": 87500 }, { "epoch": 0.88, "learning_rate": 4.23806186805486e-05, "loss": 3.5951, "step": 88000 }, { "epoch": 0.89, "learning_rate": 4.228050855941536e-05, "loss": 3.5904, "step": 88500 }, { "epoch": 0.89, "learning_rate": 4.2180398438282116e-05, "loss": 3.5861, "step": 89000 }, { "epoch": 0.9, "learning_rate": 4.2080288317148866e-05, "loss": 3.6006, "step": 89500 }, { "epoch": 0.9, "learning_rate": 4.1980178196015616e-05, "loss": 3.5911, "step": 90000 }, { "epoch": 0.91, "learning_rate": 4.188006807488237e-05, "loss": 3.5982, "step": 90500 }, { "epoch": 0.91, "learning_rate": 4.177995795374913e-05, "loss": 3.5939, "step": 91000 }, { "epoch": 0.92, "learning_rate": 4.167984783261588e-05, "loss": 3.6002, "step": 91500 }, { "epoch": 0.92, "learning_rate": 4.157973771148263e-05, "loss": 3.5975, "step": 92000 }, { "epoch": 0.93, "learning_rate": 4.1479627590349386e-05, "loss": 3.5909, "step": 92500 }, { "epoch": 0.93, "learning_rate": 4.1379517469216136e-05, "loss": 3.5986, "step": 93000 }, { "epoch": 0.94, "learning_rate": 4.1279407348082886e-05, "loss": 3.589, "step": 93500 }, { "epoch": 0.94, "learning_rate": 4.117929722694965e-05, "loss": 3.5922, "step": 94000 }, { "epoch": 0.95, "learning_rate": 4.10791871058164e-05, "loss": 3.5921, "step": 94500 }, { "epoch": 0.95, "learning_rate": 4.097907698468315e-05, "loss": 3.5971, "step": 95000 }, { "epoch": 0.96, "learning_rate": 4.0878966863549907e-05, "loss": 3.5914, "step": 95500 }, { "epoch": 0.96, "learning_rate": 4.0778856742416657e-05, "loss": 3.5933, "step": 96000 }, { "epoch": 0.97, "learning_rate": 4.067874662128341e-05, "loss": 3.5823, "step": 96500 }, { "epoch": 0.97, "learning_rate": 4.057863650015017e-05, "loss": 3.5955, "step": 97000 }, { "epoch": 0.98, "learning_rate": 4.047852637901692e-05, "loss": 3.587, "step": 97500 }, { "epoch": 0.98, "learning_rate": 4.037841625788367e-05, "loss": 3.5958, "step": 98000 }, { "epoch": 0.99, "learning_rate": 4.027830613675043e-05, "loss": 3.5866, "step": 98500 }, { "epoch": 0.99, "learning_rate": 4.017819601561718e-05, "loss": 3.5848, "step": 99000 }, { "epoch": 1.0, "learning_rate": 4.0078085894483934e-05, "loss": 3.5803, "step": 99500 }, { "epoch": 1.0, "learning_rate": 3.997797577335069e-05, "loss": 3.5811, "step": 100000 }, { "epoch": 1.01, "learning_rate": 3.987786565221744e-05, "loss": 3.5407, "step": 100500 }, { "epoch": 1.01, "learning_rate": 3.977775553108419e-05, "loss": 3.5453, "step": 101000 }, { "epoch": 1.02, "learning_rate": 3.967764540995095e-05, "loss": 3.5439, "step": 101500 }, { "epoch": 1.02, "learning_rate": 3.9577535288817704e-05, "loss": 3.5486, "step": 102000 }, { "epoch": 1.03, "learning_rate": 3.9477425167684454e-05, "loss": 3.5476, "step": 102500 }, { "epoch": 1.03, "learning_rate": 3.937731504655121e-05, "loss": 3.548, "step": 103000 }, { "epoch": 1.04, "learning_rate": 3.927720492541796e-05, "loss": 3.5348, "step": 103500 }, { "epoch": 1.04, "learning_rate": 3.917709480428471e-05, "loss": 3.5374, "step": 104000 }, { "epoch": 1.05, "learning_rate": 3.907698468315147e-05, "loss": 3.5443, "step": 104500 }, { "epoch": 1.05, "learning_rate": 3.8976874562018224e-05, "loss": 3.547, "step": 105000 }, { "epoch": 1.06, "learning_rate": 3.8876764440884974e-05, "loss": 3.5464, "step": 105500 }, { "epoch": 1.06, "learning_rate": 3.877665431975173e-05, "loss": 3.5456, "step": 106000 }, { "epoch": 1.07, "learning_rate": 3.867654419861848e-05, "loss": 3.5402, "step": 106500 }, { "epoch": 1.07, "learning_rate": 3.857643407748523e-05, "loss": 3.5425, "step": 107000 }, { "epoch": 1.08, "learning_rate": 3.847632395635199e-05, "loss": 3.5461, "step": 107500 }, { "epoch": 1.08, "learning_rate": 3.8376213835218744e-05, "loss": 3.547, "step": 108000 }, { "epoch": 1.09, "learning_rate": 3.8276103714085494e-05, "loss": 3.547, "step": 108500 }, { "epoch": 1.09, "learning_rate": 3.8175993592952244e-05, "loss": 3.5478, "step": 109000 }, { "epoch": 1.1, "learning_rate": 3.8075883471819e-05, "loss": 3.5439, "step": 109500 }, { "epoch": 1.1, "learning_rate": 3.797577335068576e-05, "loss": 3.5428, "step": 110000 }, { "epoch": 1.11, "learning_rate": 3.787566322955251e-05, "loss": 3.5457, "step": 110500 }, { "epoch": 1.11, "learning_rate": 3.7775553108419265e-05, "loss": 3.5394, "step": 111000 }, { "epoch": 1.12, "learning_rate": 3.7675442987286015e-05, "loss": 3.5455, "step": 111500 }, { "epoch": 1.12, "learning_rate": 3.7575332866152765e-05, "loss": 3.5464, "step": 112000 }, { "epoch": 1.13, "learning_rate": 3.747522274501952e-05, "loss": 3.5397, "step": 112500 }, { "epoch": 1.13, "learning_rate": 3.737511262388628e-05, "loss": 3.5395, "step": 113000 }, { "epoch": 1.14, "learning_rate": 3.727500250275303e-05, "loss": 3.5362, "step": 113500 }, { "epoch": 1.14, "learning_rate": 3.7174892381619785e-05, "loss": 3.5442, "step": 114000 }, { "epoch": 1.15, "learning_rate": 3.7074782260486535e-05, "loss": 3.5365, "step": 114500 }, { "epoch": 1.15, "learning_rate": 3.6974672139353285e-05, "loss": 3.5408, "step": 115000 }, { "epoch": 1.16, "learning_rate": 3.687456201822005e-05, "loss": 3.5402, "step": 115500 }, { "epoch": 1.16, "learning_rate": 3.67744518970868e-05, "loss": 3.5353, "step": 116000 }, { "epoch": 1.17, "learning_rate": 3.667434177595355e-05, "loss": 3.5403, "step": 116500 }, { "epoch": 1.17, "learning_rate": 3.6574231654820305e-05, "loss": 3.5403, "step": 117000 }, { "epoch": 1.18, "learning_rate": 3.6474121533687055e-05, "loss": 3.5445, "step": 117500 }, { "epoch": 1.18, "learning_rate": 3.6374011412553805e-05, "loss": 3.5895, "step": 118000 }, { "epoch": 1.19, "learning_rate": 3.627390129142057e-05, "loss": 3.5945, "step": 118500 }, { "epoch": 1.19, "learning_rate": 3.617379117028732e-05, "loss": 3.5952, "step": 119000 }, { "epoch": 1.2, "learning_rate": 3.607368104915407e-05, "loss": 3.5922, "step": 119500 }, { "epoch": 1.2, "learning_rate": 3.5973570928020825e-05, "loss": 3.5886, "step": 120000 }, { "epoch": 1.21, "learning_rate": 3.5873460806887575e-05, "loss": 3.5939, "step": 120500 }, { "epoch": 1.21, "learning_rate": 3.577335068575433e-05, "loss": 3.5882, "step": 121000 }, { "epoch": 1.22, "learning_rate": 3.567324056462109e-05, "loss": 3.5827, "step": 121500 }, { "epoch": 1.22, "learning_rate": 3.557313044348784e-05, "loss": 3.5883, "step": 122000 }, { "epoch": 1.23, "learning_rate": 3.547302032235459e-05, "loss": 3.5938, "step": 122500 }, { "epoch": 1.23, "learning_rate": 3.5372910201221346e-05, "loss": 3.5888, "step": 123000 }, { "epoch": 1.24, "learning_rate": 3.5272800080088096e-05, "loss": 3.5887, "step": 123500 }, { "epoch": 1.24, "learning_rate": 3.517268995895485e-05, "loss": 3.5872, "step": 124000 }, { "epoch": 1.25, "learning_rate": 3.50725798378216e-05, "loss": 3.5805, "step": 124500 }, { "epoch": 1.25, "learning_rate": 3.497246971668836e-05, "loss": 3.5884, "step": 125000 }, { "epoch": 1.26, "learning_rate": 3.487235959555511e-05, "loss": 3.59, "step": 125500 }, { "epoch": 1.26, "learning_rate": 3.477224947442186e-05, "loss": 3.5949, "step": 126000 }, { "epoch": 1.27, "learning_rate": 3.467213935328862e-05, "loss": 3.5853, "step": 126500 }, { "epoch": 1.27, "learning_rate": 3.457202923215537e-05, "loss": 3.5865, "step": 127000 }, { "epoch": 1.28, "learning_rate": 3.447191911102212e-05, "loss": 3.5931, "step": 127500 }, { "epoch": 1.28, "learning_rate": 3.437180898988888e-05, "loss": 3.5939, "step": 128000 }, { "epoch": 1.29, "learning_rate": 3.427169886875563e-05, "loss": 3.5869, "step": 128500 }, { "epoch": 1.29, "learning_rate": 3.4171588747622386e-05, "loss": 3.5897, "step": 129000 }, { "epoch": 1.3, "learning_rate": 3.407147862648914e-05, "loss": 3.586, "step": 129500 }, { "epoch": 1.3, "learning_rate": 3.397136850535589e-05, "loss": 3.5922, "step": 130000 }, { "epoch": 1.31, "learning_rate": 3.387125838422264e-05, "loss": 3.5907, "step": 130500 }, { "epoch": 1.31, "learning_rate": 3.37711482630894e-05, "loss": 3.5943, "step": 131000 }, { "epoch": 1.32, "learning_rate": 3.367103814195615e-05, "loss": 3.584, "step": 131500 }, { "epoch": 1.32, "learning_rate": 3.3570928020822906e-05, "loss": 3.5856, "step": 132000 }, { "epoch": 1.33, "learning_rate": 3.347081789968966e-05, "loss": 3.5853, "step": 132500 }, { "epoch": 1.33, "learning_rate": 3.337070777855641e-05, "loss": 3.5876, "step": 133000 }, { "epoch": 1.34, "learning_rate": 3.327059765742316e-05, "loss": 3.5933, "step": 133500 }, { "epoch": 1.34, "learning_rate": 3.317048753628992e-05, "loss": 3.583, "step": 134000 }, { "epoch": 1.35, "learning_rate": 3.307037741515668e-05, "loss": 3.5854, "step": 134500 }, { "epoch": 1.35, "learning_rate": 3.297026729402343e-05, "loss": 3.5859, "step": 135000 }, { "epoch": 1.36, "learning_rate": 3.2870157172890183e-05, "loss": 3.5905, "step": 135500 }, { "epoch": 1.36, "learning_rate": 3.277004705175693e-05, "loss": 3.5863, "step": 136000 }, { "epoch": 1.37, "learning_rate": 3.266993693062368e-05, "loss": 3.5836, "step": 136500 }, { "epoch": 1.37, "learning_rate": 3.256982680949044e-05, "loss": 3.5912, "step": 137000 }, { "epoch": 1.38, "learning_rate": 3.24697166883572e-05, "loss": 3.5823, "step": 137500 }, { "epoch": 1.38, "learning_rate": 3.236960656722395e-05, "loss": 3.581, "step": 138000 }, { "epoch": 1.39, "learning_rate": 3.2269496446090704e-05, "loss": 3.5875, "step": 138500 }, { "epoch": 1.39, "learning_rate": 3.2169386324957454e-05, "loss": 3.5897, "step": 139000 }, { "epoch": 1.4, "learning_rate": 3.2069276203824204e-05, "loss": 3.5845, "step": 139500 }, { "epoch": 1.4, "learning_rate": 3.196916608269096e-05, "loss": 3.5937, "step": 140000 }, { "epoch": 1.41, "learning_rate": 3.186905596155772e-05, "loss": 3.5862, "step": 140500 }, { "epoch": 1.41, "learning_rate": 3.176894584042447e-05, "loss": 3.5894, "step": 141000 }, { "epoch": 1.42, "learning_rate": 3.166883571929122e-05, "loss": 3.5844, "step": 141500 }, { "epoch": 1.42, "learning_rate": 3.1568725598157974e-05, "loss": 3.5877, "step": 142000 }, { "epoch": 1.43, "learning_rate": 3.1468615477024724e-05, "loss": 3.5814, "step": 142500 }, { "epoch": 1.43, "learning_rate": 3.136850535589148e-05, "loss": 3.5898, "step": 143000 }, { "epoch": 1.44, "learning_rate": 3.126839523475824e-05, "loss": 3.5867, "step": 143500 }, { "epoch": 1.44, "learning_rate": 3.116828511362499e-05, "loss": 3.5788, "step": 144000 }, { "epoch": 1.45, "learning_rate": 3.106817499249174e-05, "loss": 3.5853, "step": 144500 }, { "epoch": 1.45, "learning_rate": 3.0968064871358494e-05, "loss": 3.5797, "step": 145000 }, { "epoch": 1.46, "learning_rate": 3.086795475022525e-05, "loss": 3.5808, "step": 145500 }, { "epoch": 1.46, "learning_rate": 3.0767844629092e-05, "loss": 3.5905, "step": 146000 }, { "epoch": 1.47, "learning_rate": 3.066773450795876e-05, "loss": 3.5829, "step": 146500 }, { "epoch": 1.47, "learning_rate": 3.056762438682551e-05, "loss": 3.5814, "step": 147000 }, { "epoch": 1.48, "learning_rate": 3.046751426569226e-05, "loss": 3.5844, "step": 147500 }, { "epoch": 1.48, "learning_rate": 3.0367404144559018e-05, "loss": 3.5842, "step": 148000 }, { "epoch": 1.49, "learning_rate": 3.026729402342577e-05, "loss": 3.5841, "step": 148500 }, { "epoch": 1.49, "learning_rate": 3.0167183902292525e-05, "loss": 3.5872, "step": 149000 }, { "epoch": 1.5, "learning_rate": 3.0067073781159275e-05, "loss": 3.5906, "step": 149500 }, { "epoch": 1.5, "learning_rate": 2.996696366002603e-05, "loss": 3.5784, "step": 150000 }, { "epoch": 1.51, "learning_rate": 2.9866853538892785e-05, "loss": 3.5907, "step": 150500 }, { "epoch": 1.51, "learning_rate": 2.9766743417759535e-05, "loss": 3.5839, "step": 151000 }, { "epoch": 1.52, "learning_rate": 2.966663329662629e-05, "loss": 3.5824, "step": 151500 }, { "epoch": 1.52, "learning_rate": 2.9566523175493045e-05, "loss": 3.5771, "step": 152000 }, { "epoch": 1.53, "learning_rate": 2.9466413054359795e-05, "loss": 3.5827, "step": 152500 }, { "epoch": 1.53, "learning_rate": 2.936630293322655e-05, "loss": 3.5817, "step": 153000 }, { "epoch": 1.54, "learning_rate": 2.92661928120933e-05, "loss": 3.5869, "step": 153500 }, { "epoch": 1.54, "learning_rate": 2.9166082690960055e-05, "loss": 3.5867, "step": 154000 }, { "epoch": 1.55, "learning_rate": 2.906597256982681e-05, "loss": 3.5923, "step": 154500 }, { "epoch": 1.55, "learning_rate": 2.896586244869356e-05, "loss": 3.5823, "step": 155000 }, { "epoch": 1.56, "learning_rate": 2.886575232756032e-05, "loss": 3.5769, "step": 155500 }, { "epoch": 1.56, "learning_rate": 2.8765642206427072e-05, "loss": 3.579, "step": 156000 }, { "epoch": 1.57, "learning_rate": 2.8665532085293822e-05, "loss": 3.5777, "step": 156500 }, { "epoch": 1.57, "learning_rate": 2.856542196416058e-05, "loss": 3.5832, "step": 157000 }, { "epoch": 1.58, "learning_rate": 2.8465311843027332e-05, "loss": 3.5799, "step": 157500 }, { "epoch": 1.58, "learning_rate": 2.8365201721894082e-05, "loss": 3.5748, "step": 158000 }, { "epoch": 1.59, "learning_rate": 2.826509160076084e-05, "loss": 3.5888, "step": 158500 }, { "epoch": 1.59, "learning_rate": 2.8164981479627592e-05, "loss": 3.5878, "step": 159000 }, { "epoch": 1.6, "learning_rate": 2.8064871358494342e-05, "loss": 3.5815, "step": 159500 }, { "epoch": 1.6, "learning_rate": 2.79647612373611e-05, "loss": 3.5785, "step": 160000 }, { "epoch": 1.61, "learning_rate": 2.7864651116227852e-05, "loss": 3.5865, "step": 160500 }, { "epoch": 1.61, "learning_rate": 2.7764540995094606e-05, "loss": 3.5718, "step": 161000 }, { "epoch": 1.62, "learning_rate": 2.766443087396136e-05, "loss": 3.5854, "step": 161500 }, { "epoch": 1.62, "learning_rate": 2.756432075282811e-05, "loss": 3.5721, "step": 162000 }, { "epoch": 1.63, "learning_rate": 2.7464210631694866e-05, "loss": 3.5775, "step": 162500 }, { "epoch": 1.63, "learning_rate": 2.736410051056162e-05, "loss": 3.5874, "step": 163000 }, { "epoch": 1.64, "learning_rate": 2.726399038942837e-05, "loss": 3.5854, "step": 163500 }, { "epoch": 1.64, "learning_rate": 2.7163880268295126e-05, "loss": 3.582, "step": 164000 }, { "epoch": 1.65, "learning_rate": 2.706377014716188e-05, "loss": 3.5753, "step": 164500 }, { "epoch": 1.65, "learning_rate": 2.6963660026028633e-05, "loss": 3.5792, "step": 165000 }, { "epoch": 1.66, "learning_rate": 2.6863549904895386e-05, "loss": 3.5759, "step": 165500 }, { "epoch": 1.66, "learning_rate": 2.676343978376214e-05, "loss": 3.5819, "step": 166000 }, { "epoch": 1.67, "learning_rate": 2.6663329662628893e-05, "loss": 3.5744, "step": 166500 }, { "epoch": 1.67, "learning_rate": 2.6563219541495646e-05, "loss": 3.5814, "step": 167000 }, { "epoch": 1.68, "learning_rate": 2.64631094203624e-05, "loss": 3.579, "step": 167500 }, { "epoch": 1.68, "learning_rate": 2.6362999299229153e-05, "loss": 3.5872, "step": 168000 }, { "epoch": 1.69, "learning_rate": 2.6262889178095906e-05, "loss": 3.5764, "step": 168500 }, { "epoch": 1.69, "learning_rate": 2.616277905696266e-05, "loss": 3.576, "step": 169000 }, { "epoch": 1.7, "learning_rate": 2.6062668935829413e-05, "loss": 3.5744, "step": 169500 }, { "epoch": 1.7, "learning_rate": 2.5962558814696166e-05, "loss": 3.5742, "step": 170000 }, { "epoch": 1.71, "learning_rate": 2.586244869356292e-05, "loss": 3.5718, "step": 170500 }, { "epoch": 1.71, "learning_rate": 2.5762338572429673e-05, "loss": 3.5761, "step": 171000 }, { "epoch": 1.72, "learning_rate": 2.5662228451296426e-05, "loss": 3.5765, "step": 171500 }, { "epoch": 1.72, "learning_rate": 2.556211833016318e-05, "loss": 3.5783, "step": 172000 }, { "epoch": 1.73, "learning_rate": 2.5462008209029933e-05, "loss": 3.5767, "step": 172500 }, { "epoch": 1.73, "learning_rate": 2.5361898087896687e-05, "loss": 3.5697, "step": 173000 }, { "epoch": 1.74, "learning_rate": 2.526178796676344e-05, "loss": 3.57, "step": 173500 }, { "epoch": 1.74, "learning_rate": 2.5161677845630193e-05, "loss": 3.573, "step": 174000 }, { "epoch": 1.75, "learning_rate": 2.506156772449695e-05, "loss": 3.5741, "step": 174500 }, { "epoch": 1.75, "learning_rate": 2.49614576033637e-05, "loss": 3.5802, "step": 175000 }, { "epoch": 1.76, "learning_rate": 2.4861347482230453e-05, "loss": 3.5763, "step": 175500 }, { "epoch": 1.76, "learning_rate": 2.476123736109721e-05, "loss": 3.5697, "step": 176000 }, { "epoch": 1.77, "learning_rate": 2.466112723996396e-05, "loss": 3.5797, "step": 176500 }, { "epoch": 1.77, "learning_rate": 2.4561017118830714e-05, "loss": 3.5779, "step": 177000 }, { "epoch": 1.78, "learning_rate": 2.4460906997697467e-05, "loss": 3.5754, "step": 177500 }, { "epoch": 1.78, "learning_rate": 2.436079687656422e-05, "loss": 3.5808, "step": 178000 }, { "epoch": 1.79, "learning_rate": 2.4260686755430974e-05, "loss": 3.57, "step": 178500 }, { "epoch": 1.79, "learning_rate": 2.4160576634297727e-05, "loss": 3.5687, "step": 179000 }, { "epoch": 1.8, "learning_rate": 2.406046651316448e-05, "loss": 3.5777, "step": 179500 }, { "epoch": 1.8, "learning_rate": 2.3960356392031237e-05, "loss": 3.5793, "step": 180000 }, { "epoch": 1.81, "learning_rate": 2.3860246270897987e-05, "loss": 3.5826, "step": 180500 }, { "epoch": 1.81, "learning_rate": 2.376013614976474e-05, "loss": 3.5682, "step": 181000 }, { "epoch": 1.82, "learning_rate": 2.3660026028631497e-05, "loss": 3.5774, "step": 181500 }, { "epoch": 1.82, "learning_rate": 2.3559915907498247e-05, "loss": 3.5774, "step": 182000 }, { "epoch": 1.83, "learning_rate": 2.3459805786365e-05, "loss": 3.5682, "step": 182500 }, { "epoch": 1.83, "learning_rate": 2.3359695665231758e-05, "loss": 3.5741, "step": 183000 }, { "epoch": 1.84, "learning_rate": 2.3259585544098508e-05, "loss": 3.5718, "step": 183500 }, { "epoch": 1.84, "learning_rate": 2.3159475422965264e-05, "loss": 3.5704, "step": 184000 }, { "epoch": 1.85, "learning_rate": 2.3059365301832018e-05, "loss": 3.5711, "step": 184500 }, { "epoch": 1.85, "learning_rate": 2.2959255180698768e-05, "loss": 3.5759, "step": 185000 }, { "epoch": 1.86, "learning_rate": 2.2859145059565524e-05, "loss": 3.5717, "step": 185500 }, { "epoch": 1.86, "learning_rate": 2.2759034938432274e-05, "loss": 3.5752, "step": 186000 }, { "epoch": 1.87, "learning_rate": 2.2658924817299028e-05, "loss": 3.5771, "step": 186500 }, { "epoch": 1.87, "learning_rate": 2.2558814696165785e-05, "loss": 3.5646, "step": 187000 }, { "epoch": 1.88, "learning_rate": 2.2458704575032535e-05, "loss": 3.5694, "step": 187500 }, { "epoch": 1.88, "learning_rate": 2.2358594453899288e-05, "loss": 3.5752, "step": 188000 }, { "epoch": 1.89, "learning_rate": 2.2258484332766045e-05, "loss": 3.5732, "step": 188500 }, { "epoch": 1.89, "learning_rate": 2.2158374211632795e-05, "loss": 3.576, "step": 189000 }, { "epoch": 1.9, "learning_rate": 2.205826409049955e-05, "loss": 3.579, "step": 189500 }, { "epoch": 1.9, "learning_rate": 2.1958153969366305e-05, "loss": 3.577, "step": 190000 }, { "epoch": 1.91, "learning_rate": 2.1858043848233055e-05, "loss": 3.5761, "step": 190500 }, { "epoch": 1.91, "learning_rate": 2.175793372709981e-05, "loss": 3.5713, "step": 191000 }, { "epoch": 1.92, "learning_rate": 2.1657823605966565e-05, "loss": 3.567, "step": 191500 }, { "epoch": 1.92, "learning_rate": 2.1557713484833315e-05, "loss": 3.5702, "step": 192000 }, { "epoch": 1.93, "learning_rate": 2.145760336370007e-05, "loss": 3.5675, "step": 192500 }, { "epoch": 1.93, "learning_rate": 2.1357493242566825e-05, "loss": 3.5677, "step": 193000 }, { "epoch": 1.94, "learning_rate": 2.125738312143358e-05, "loss": 3.5756, "step": 193500 }, { "epoch": 1.94, "learning_rate": 2.1157273000300332e-05, "loss": 3.5742, "step": 194000 }, { "epoch": 1.95, "learning_rate": 2.1057162879167082e-05, "loss": 3.5783, "step": 194500 }, { "epoch": 1.95, "learning_rate": 2.095705275803384e-05, "loss": 3.5716, "step": 195000 }, { "epoch": 1.96, "learning_rate": 2.0856942636900592e-05, "loss": 3.5644, "step": 195500 }, { "epoch": 1.96, "learning_rate": 2.0756832515767342e-05, "loss": 3.5701, "step": 196000 }, { "epoch": 1.97, "learning_rate": 2.06567223946341e-05, "loss": 3.5744, "step": 196500 }, { "epoch": 1.97, "learning_rate": 2.0556612273500852e-05, "loss": 3.5715, "step": 197000 }, { "epoch": 1.98, "learning_rate": 2.0456502152367602e-05, "loss": 3.5673, "step": 197500 }, { "epoch": 1.98, "learning_rate": 2.035639203123436e-05, "loss": 3.5683, "step": 198000 }, { "epoch": 1.99, "learning_rate": 2.0256281910101112e-05, "loss": 3.5723, "step": 198500 }, { "epoch": 1.99, "learning_rate": 2.0156171788967866e-05, "loss": 3.5749, "step": 199000 }, { "epoch": 2.0, "learning_rate": 2.005606166783462e-05, "loss": 3.5611, "step": 199500 }, { "epoch": 2.0, "learning_rate": 1.9955951546701372e-05, "loss": 3.561, "step": 200000 }, { "epoch": 2.01, "learning_rate": 1.9855841425568126e-05, "loss": 3.5365, "step": 200500 }, { "epoch": 2.01, "learning_rate": 1.975573130443488e-05, "loss": 3.5517, "step": 201000 }, { "epoch": 2.02, "learning_rate": 1.9655621183301632e-05, "loss": 3.5408, "step": 201500 }, { "epoch": 2.02, "learning_rate": 1.9555511062168386e-05, "loss": 3.5401, "step": 202000 }, { "epoch": 2.03, "learning_rate": 1.945540094103514e-05, "loss": 3.5402, "step": 202500 }, { "epoch": 2.03, "learning_rate": 1.9355290819901893e-05, "loss": 3.5454, "step": 203000 }, { "epoch": 2.04, "learning_rate": 1.9255180698768646e-05, "loss": 3.537, "step": 203500 }, { "epoch": 2.04, "learning_rate": 1.91550705776354e-05, "loss": 3.5413, "step": 204000 }, { "epoch": 2.05, "learning_rate": 1.9054960456502153e-05, "loss": 3.543, "step": 204500 }, { "epoch": 2.05, "learning_rate": 1.8954850335368906e-05, "loss": 3.5367, "step": 205000 }, { "epoch": 2.06, "learning_rate": 1.885474021423566e-05, "loss": 3.5449, "step": 205500 }, { "epoch": 2.06, "learning_rate": 1.8754630093102413e-05, "loss": 3.536, "step": 206000 }, { "epoch": 2.07, "learning_rate": 1.8654519971969166e-05, "loss": 3.5429, "step": 206500 }, { "epoch": 2.07, "learning_rate": 1.855440985083592e-05, "loss": 3.5373, "step": 207000 }, { "epoch": 2.08, "learning_rate": 1.8454299729702673e-05, "loss": 3.5495, "step": 207500 }, { "epoch": 2.08, "learning_rate": 1.8354189608569426e-05, "loss": 3.5432, "step": 208000 }, { "epoch": 2.09, "learning_rate": 1.8254079487436183e-05, "loss": 3.5435, "step": 208500 }, { "epoch": 2.09, "learning_rate": 1.8153969366302933e-05, "loss": 3.545, "step": 209000 }, { "epoch": 2.1, "learning_rate": 1.8053859245169686e-05, "loss": 3.5331, "step": 209500 }, { "epoch": 2.1, "learning_rate": 1.7953749124036443e-05, "loss": 3.5315, "step": 210000 }, { "epoch": 2.11, "learning_rate": 1.7853639002903193e-05, "loss": 3.5333, "step": 210500 }, { "epoch": 2.11, "learning_rate": 1.7753528881769947e-05, "loss": 3.548, "step": 211000 }, { "epoch": 2.12, "learning_rate": 1.76534187606367e-05, "loss": 3.5485, "step": 211500 }, { "epoch": 2.12, "learning_rate": 1.7553308639503453e-05, "loss": 3.5378, "step": 212000 }, { "epoch": 2.13, "learning_rate": 1.745319851837021e-05, "loss": 3.5438, "step": 212500 }, { "epoch": 2.13, "learning_rate": 1.735308839723696e-05, "loss": 3.5346, "step": 213000 }, { "epoch": 2.14, "learning_rate": 1.7252978276103713e-05, "loss": 3.5406, "step": 213500 }, { "epoch": 2.14, "learning_rate": 1.715286815497047e-05, "loss": 3.5365, "step": 214000 }, { "epoch": 2.15, "learning_rate": 1.705275803383722e-05, "loss": 3.5403, "step": 214500 }, { "epoch": 2.15, "learning_rate": 1.6952647912703974e-05, "loss": 3.5449, "step": 215000 }, { "epoch": 2.16, "learning_rate": 1.685253779157073e-05, "loss": 3.5456, "step": 215500 }, { "epoch": 2.16, "learning_rate": 1.675242767043748e-05, "loss": 3.5391, "step": 216000 }, { "epoch": 2.17, "learning_rate": 1.6652317549304234e-05, "loss": 3.5437, "step": 216500 }, { "epoch": 2.17, "learning_rate": 1.655220742817099e-05, "loss": 3.5374, "step": 217000 }, { "epoch": 2.18, "learning_rate": 1.645209730703774e-05, "loss": 3.5483, "step": 217500 }, { "epoch": 2.18, "learning_rate": 1.6351987185904497e-05, "loss": 3.546, "step": 218000 }, { "epoch": 2.19, "learning_rate": 1.6251877064771247e-05, "loss": 3.5468, "step": 218500 }, { "epoch": 2.19, "learning_rate": 1.6151766943638e-05, "loss": 3.542, "step": 219000 }, { "epoch": 2.2, "learning_rate": 1.6051656822504757e-05, "loss": 3.5409, "step": 219500 }, { "epoch": 2.2, "learning_rate": 1.5951546701371507e-05, "loss": 3.5452, "step": 220000 }, { "epoch": 2.21, "learning_rate": 1.585143658023826e-05, "loss": 3.5398, "step": 220500 }, { "epoch": 2.21, "learning_rate": 1.5751326459105017e-05, "loss": 3.5423, "step": 221000 }, { "epoch": 2.22, "learning_rate": 1.5651216337971767e-05, "loss": 3.5423, "step": 221500 }, { "epoch": 2.22, "learning_rate": 1.5551106216838524e-05, "loss": 3.5529, "step": 222000 }, { "epoch": 2.23, "learning_rate": 1.5450996095705278e-05, "loss": 3.5467, "step": 222500 }, { "epoch": 2.23, "learning_rate": 1.5350885974572028e-05, "loss": 3.5428, "step": 223000 }, { "epoch": 2.24, "learning_rate": 1.5250775853438784e-05, "loss": 3.5497, "step": 223500 }, { "epoch": 2.24, "learning_rate": 1.5150665732305536e-05, "loss": 3.5413, "step": 224000 }, { "epoch": 2.25, "learning_rate": 1.505055561117229e-05, "loss": 3.5399, "step": 224500 }, { "epoch": 2.25, "learning_rate": 1.4950445490039045e-05, "loss": 3.5404, "step": 225000 }, { "epoch": 2.26, "learning_rate": 1.4850335368905796e-05, "loss": 3.5494, "step": 225500 }, { "epoch": 2.26, "learning_rate": 1.475022524777255e-05, "loss": 3.5332, "step": 226000 }, { "epoch": 2.27, "learning_rate": 1.4650115126639305e-05, "loss": 3.5436, "step": 226500 }, { "epoch": 2.27, "learning_rate": 1.4550005005506056e-05, "loss": 3.5414, "step": 227000 }, { "epoch": 2.28, "learning_rate": 1.444989488437281e-05, "loss": 3.5463, "step": 227500 }, { "epoch": 2.28, "learning_rate": 1.4349784763239565e-05, "loss": 3.5441, "step": 228000 }, { "epoch": 2.29, "learning_rate": 1.4249674642106318e-05, "loss": 3.5448, "step": 228500 }, { "epoch": 2.29, "learning_rate": 1.414956452097307e-05, "loss": 3.5414, "step": 229000 }, { "epoch": 2.3, "learning_rate": 1.4049454399839823e-05, "loss": 3.5428, "step": 229500 }, { "epoch": 2.3, "learning_rate": 1.3949344278706578e-05, "loss": 3.5371, "step": 230000 }, { "epoch": 2.31, "learning_rate": 1.3849234157573332e-05, "loss": 3.545, "step": 230500 }, { "epoch": 2.31, "learning_rate": 1.3749124036440083e-05, "loss": 3.5379, "step": 231000 }, { "epoch": 2.32, "learning_rate": 1.3649013915306838e-05, "loss": 3.5405, "step": 231500 }, { "epoch": 2.32, "learning_rate": 1.3548903794173592e-05, "loss": 3.5417, "step": 232000 }, { "epoch": 2.33, "learning_rate": 1.3448793673040345e-05, "loss": 3.5416, "step": 232500 }, { "epoch": 2.33, "learning_rate": 1.3348683551907097e-05, "loss": 3.543, "step": 233000 }, { "epoch": 2.34, "learning_rate": 1.3248573430773852e-05, "loss": 3.5473, "step": 233500 }, { "epoch": 2.34, "learning_rate": 1.3148463309640605e-05, "loss": 3.5424, "step": 234000 }, { "epoch": 2.35, "learning_rate": 1.3048353188507359e-05, "loss": 3.5367, "step": 234500 }, { "epoch": 2.35, "learning_rate": 1.2948243067374112e-05, "loss": 3.5443, "step": 235000 }, { "epoch": 2.36, "learning_rate": 1.2848132946240865e-05, "loss": 3.5426, "step": 235500 }, { "epoch": 2.36, "learning_rate": 1.2748022825107619e-05, "loss": 3.5478, "step": 236000 }, { "epoch": 2.37, "learning_rate": 1.2647912703974372e-05, "loss": 3.5481, "step": 236500 }, { "epoch": 2.37, "learning_rate": 1.2547802582841126e-05, "loss": 3.5358, "step": 237000 }, { "epoch": 2.38, "learning_rate": 1.2447692461707879e-05, "loss": 3.5492, "step": 237500 }, { "epoch": 2.38, "learning_rate": 1.2347582340574632e-05, "loss": 3.5428, "step": 238000 }, { "epoch": 2.39, "learning_rate": 1.2247472219441386e-05, "loss": 3.5412, "step": 238500 }, { "epoch": 2.39, "learning_rate": 1.2147362098308139e-05, "loss": 3.5417, "step": 239000 }, { "epoch": 2.4, "learning_rate": 1.2047251977174892e-05, "loss": 3.5401, "step": 239500 }, { "epoch": 2.4, "learning_rate": 1.1947141856041647e-05, "loss": 3.5451, "step": 240000 }, { "epoch": 2.41, "learning_rate": 1.18470317349084e-05, "loss": 3.5367, "step": 240500 }, { "epoch": 2.41, "learning_rate": 1.1746921613775153e-05, "loss": 3.5391, "step": 241000 }, { "epoch": 2.42, "learning_rate": 1.1646811492641906e-05, "loss": 3.5472, "step": 241500 }, { "epoch": 2.42, "learning_rate": 1.1546701371508661e-05, "loss": 3.5384, "step": 242000 }, { "epoch": 2.43, "learning_rate": 1.1446591250375413e-05, "loss": 3.5407, "step": 242500 }, { "epoch": 2.43, "learning_rate": 1.1346481129242166e-05, "loss": 3.5353, "step": 243000 }, { "epoch": 2.44, "learning_rate": 1.1246371008108921e-05, "loss": 3.5362, "step": 243500 }, { "epoch": 2.44, "learning_rate": 1.1146260886975673e-05, "loss": 3.5466, "step": 244000 }, { "epoch": 2.45, "learning_rate": 1.1046150765842426e-05, "loss": 3.5367, "step": 244500 }, { "epoch": 2.45, "learning_rate": 1.0946040644709181e-05, "loss": 3.5408, "step": 245000 }, { "epoch": 2.46, "learning_rate": 1.0845930523575935e-05, "loss": 3.5473, "step": 245500 }, { "epoch": 2.46, "learning_rate": 1.0745820402442686e-05, "loss": 3.548, "step": 246000 }, { "epoch": 2.47, "learning_rate": 1.064571028130944e-05, "loss": 3.5384, "step": 246500 }, { "epoch": 2.47, "learning_rate": 1.0545600160176195e-05, "loss": 3.5336, "step": 247000 }, { "epoch": 2.48, "learning_rate": 1.0445490039042948e-05, "loss": 3.5411, "step": 247500 }, { "epoch": 2.48, "learning_rate": 1.03453799179097e-05, "loss": 3.5506, "step": 248000 }, { "epoch": 2.49, "learning_rate": 1.0245269796776455e-05, "loss": 3.548, "step": 248500 }, { "epoch": 2.49, "learning_rate": 1.0145159675643208e-05, "loss": 3.5342, "step": 249000 }, { "epoch": 2.5, "learning_rate": 1.0045049554509962e-05, "loss": 3.5415, "step": 249500 }, { "epoch": 2.5, "learning_rate": 9.944939433376713e-06, "loss": 3.5354, "step": 250000 }, { "epoch": 2.51, "learning_rate": 9.844829312243468e-06, "loss": 3.5313, "step": 250500 }, { "epoch": 2.51, "learning_rate": 9.744719191110222e-06, "loss": 3.5369, "step": 251000 }, { "epoch": 2.52, "learning_rate": 9.644609069976975e-06, "loss": 3.5452, "step": 251500 }, { "epoch": 2.52, "learning_rate": 9.544498948843729e-06, "loss": 3.5447, "step": 252000 }, { "epoch": 2.53, "learning_rate": 9.444388827710482e-06, "loss": 3.5509, "step": 252500 }, { "epoch": 2.53, "learning_rate": 9.344278706577235e-06, "loss": 3.538, "step": 253000 }, { "epoch": 2.54, "learning_rate": 9.244168585443989e-06, "loss": 3.5405, "step": 253500 }, { "epoch": 2.54, "learning_rate": 9.144058464310742e-06, "loss": 3.5293, "step": 254000 }, { "epoch": 2.55, "learning_rate": 9.043948343177495e-06, "loss": 3.5478, "step": 254500 }, { "epoch": 2.55, "learning_rate": 8.943838222044249e-06, "loss": 3.53, "step": 255000 }, { "epoch": 2.56, "learning_rate": 8.843728100911002e-06, "loss": 3.54, "step": 255500 }, { "epoch": 2.56, "learning_rate": 8.743617979777756e-06, "loss": 3.5342, "step": 256000 }, { "epoch": 2.57, "learning_rate": 8.643507858644509e-06, "loss": 3.5373, "step": 256500 }, { "epoch": 2.57, "learning_rate": 8.543397737511264e-06, "loss": 3.5341, "step": 257000 }, { "epoch": 2.58, "learning_rate": 8.443287616378016e-06, "loss": 3.5384, "step": 257500 }, { "epoch": 2.58, "learning_rate": 8.343177495244769e-06, "loss": 3.5415, "step": 258000 }, { "epoch": 2.59, "learning_rate": 8.243067374111522e-06, "loss": 3.5315, "step": 258500 }, { "epoch": 2.59, "learning_rate": 8.142957252978277e-06, "loss": 3.5401, "step": 259000 }, { "epoch": 2.6, "learning_rate": 8.042847131845029e-06, "loss": 3.541, "step": 259500 }, { "epoch": 2.6, "learning_rate": 7.942737010711783e-06, "loss": 3.5443, "step": 260000 }, { "epoch": 2.61, "learning_rate": 7.842626889578538e-06, "loss": 3.5413, "step": 260500 }, { "epoch": 2.61, "learning_rate": 7.742516768445291e-06, "loss": 3.5302, "step": 261000 }, { "epoch": 2.62, "learning_rate": 7.642406647312043e-06, "loss": 3.5429, "step": 261500 }, { "epoch": 2.62, "learning_rate": 7.542296526178797e-06, "loss": 3.5417, "step": 262000 }, { "epoch": 2.63, "learning_rate": 7.442186405045551e-06, "loss": 3.5402, "step": 262500 }, { "epoch": 2.63, "learning_rate": 7.342076283912304e-06, "loss": 3.5407, "step": 263000 }, { "epoch": 2.64, "learning_rate": 7.241966162779057e-06, "loss": 3.5383, "step": 263500 }, { "epoch": 2.64, "learning_rate": 7.14185604164581e-06, "loss": 3.5362, "step": 264000 }, { "epoch": 2.65, "learning_rate": 7.041745920512564e-06, "loss": 3.5428, "step": 264500 }, { "epoch": 2.65, "learning_rate": 6.941635799379317e-06, "loss": 3.5374, "step": 265000 }, { "epoch": 2.66, "learning_rate": 6.8415256782460705e-06, "loss": 3.5414, "step": 265500 }, { "epoch": 2.66, "learning_rate": 6.741415557112825e-06, "loss": 3.5317, "step": 266000 }, { "epoch": 2.67, "learning_rate": 6.641305435979577e-06, "loss": 3.54, "step": 266500 }, { "epoch": 2.67, "learning_rate": 6.5411953148463315e-06, "loss": 3.543, "step": 267000 }, { "epoch": 2.68, "learning_rate": 6.441085193713084e-06, "loss": 3.5407, "step": 267500 }, { "epoch": 2.68, "learning_rate": 6.340975072579838e-06, "loss": 3.5433, "step": 268000 }, { "epoch": 2.69, "learning_rate": 6.240864951446592e-06, "loss": 3.5359, "step": 268500 }, { "epoch": 2.69, "learning_rate": 6.140754830313345e-06, "loss": 3.5419, "step": 269000 }, { "epoch": 2.7, "learning_rate": 6.040644709180098e-06, "loss": 3.5488, "step": 269500 }, { "epoch": 2.7, "learning_rate": 5.940534588046852e-06, "loss": 3.5352, "step": 270000 }, { "epoch": 2.71, "learning_rate": 5.840424466913605e-06, "loss": 3.5328, "step": 270500 }, { "epoch": 2.71, "learning_rate": 5.7403143457803585e-06, "loss": 3.5397, "step": 271000 }, { "epoch": 2.72, "learning_rate": 5.640204224647112e-06, "loss": 3.529, "step": 271500 }, { "epoch": 2.72, "learning_rate": 5.540094103513866e-06, "loss": 3.5405, "step": 272000 }, { "epoch": 2.73, "learning_rate": 5.439983982380619e-06, "loss": 3.5309, "step": 272500 }, { "epoch": 2.73, "learning_rate": 5.339873861247372e-06, "loss": 3.5336, "step": 273000 }, { "epoch": 2.74, "learning_rate": 5.239763740114125e-06, "loss": 3.5471, "step": 273500 }, { "epoch": 2.74, "learning_rate": 5.139653618980879e-06, "loss": 3.5301, "step": 274000 }, { "epoch": 2.75, "learning_rate": 5.039543497847633e-06, "loss": 3.537, "step": 274500 }, { "epoch": 2.75, "learning_rate": 4.9394333767143855e-06, "loss": 3.5421, "step": 275000 }, { "epoch": 2.76, "learning_rate": 4.83932325558114e-06, "loss": 3.5365, "step": 275500 }, { "epoch": 2.76, "learning_rate": 4.739213134447892e-06, "loss": 3.5313, "step": 276000 }, { "epoch": 2.77, "learning_rate": 4.6391030133146465e-06, "loss": 3.5356, "step": 276500 }, { "epoch": 2.77, "learning_rate": 4.5389928921814e-06, "loss": 3.5387, "step": 277000 }, { "epoch": 2.78, "learning_rate": 4.438882771048153e-06, "loss": 3.5386, "step": 277500 }, { "epoch": 2.78, "learning_rate": 4.338772649914907e-06, "loss": 3.5426, "step": 278000 }, { "epoch": 2.79, "learning_rate": 4.23866252878166e-06, "loss": 3.5425, "step": 278500 }, { "epoch": 2.79, "learning_rate": 4.138552407648413e-06, "loss": 3.5396, "step": 279000 }, { "epoch": 2.8, "learning_rate": 4.038442286515167e-06, "loss": 3.5389, "step": 279500 }, { "epoch": 2.8, "learning_rate": 3.93833216538192e-06, "loss": 3.5308, "step": 280000 }, { "epoch": 2.81, "learning_rate": 3.838222044248674e-06, "loss": 3.5359, "step": 280500 }, { "epoch": 2.81, "learning_rate": 3.738111923115427e-06, "loss": 3.5365, "step": 281000 }, { "epoch": 2.82, "learning_rate": 3.6380018019821807e-06, "loss": 3.5287, "step": 281500 }, { "epoch": 2.82, "learning_rate": 3.537891680848934e-06, "loss": 3.5452, "step": 282000 }, { "epoch": 2.83, "learning_rate": 3.4377815597156874e-06, "loss": 3.545, "step": 282500 }, { "epoch": 2.83, "learning_rate": 3.337671438582441e-06, "loss": 3.5322, "step": 283000 }, { "epoch": 2.84, "learning_rate": 3.237561317449194e-06, "loss": 3.5277, "step": 283500 }, { "epoch": 2.84, "learning_rate": 3.1374511963159475e-06, "loss": 3.5443, "step": 284000 }, { "epoch": 2.85, "learning_rate": 3.0373410751827013e-06, "loss": 3.5467, "step": 284500 }, { "epoch": 2.85, "learning_rate": 2.9372309540494547e-06, "loss": 3.5383, "step": 285000 }, { "epoch": 2.86, "learning_rate": 2.837120832916208e-06, "loss": 3.5327, "step": 285500 }, { "epoch": 2.86, "learning_rate": 2.7370107117829615e-06, "loss": 3.534, "step": 286000 }, { "epoch": 2.87, "learning_rate": 2.6369005906497144e-06, "loss": 3.5382, "step": 286500 }, { "epoch": 2.87, "learning_rate": 2.536790469516468e-06, "loss": 3.5347, "step": 287000 }, { "epoch": 2.88, "learning_rate": 2.4366803483832216e-06, "loss": 3.5277, "step": 287500 }, { "epoch": 2.88, "learning_rate": 2.336570227249975e-06, "loss": 3.5319, "step": 288000 }, { "epoch": 2.89, "learning_rate": 2.2364601061167284e-06, "loss": 3.5436, "step": 288500 }, { "epoch": 2.89, "learning_rate": 2.1363499849834817e-06, "loss": 3.538, "step": 289000 }, { "epoch": 2.9, "learning_rate": 2.036239863850235e-06, "loss": 3.5365, "step": 289500 }, { "epoch": 2.9, "learning_rate": 1.936129742716989e-06, "loss": 3.5385, "step": 290000 }, { "epoch": 2.91, "learning_rate": 1.836019621583742e-06, "loss": 3.5317, "step": 290500 }, { "epoch": 2.91, "learning_rate": 1.7359095004504957e-06, "loss": 3.536, "step": 291000 }, { "epoch": 2.92, "learning_rate": 1.635799379317249e-06, "loss": 3.5383, "step": 291500 }, { "epoch": 2.92, "learning_rate": 1.5356892581840024e-06, "loss": 3.5316, "step": 292000 }, { "epoch": 2.93, "learning_rate": 1.435579137050756e-06, "loss": 3.5352, "step": 292500 }, { "epoch": 2.93, "learning_rate": 1.3354690159175094e-06, "loss": 3.5296, "step": 293000 }, { "epoch": 2.94, "learning_rate": 1.2353588947842625e-06, "loss": 3.5341, "step": 293500 }, { "epoch": 2.94, "learning_rate": 1.1352487736510161e-06, "loss": 3.5336, "step": 294000 }, { "epoch": 2.95, "learning_rate": 1.0351386525177695e-06, "loss": 3.5404, "step": 294500 }, { "epoch": 2.95, "learning_rate": 9.35028531384523e-07, "loss": 3.5433, "step": 295000 }, { "epoch": 2.96, "learning_rate": 8.349184102512765e-07, "loss": 3.5247, "step": 295500 }, { "epoch": 2.96, "learning_rate": 7.348082891180298e-07, "loss": 3.5389, "step": 296000 }, { "epoch": 2.97, "learning_rate": 6.346981679847833e-07, "loss": 3.539, "step": 296500 }, { "epoch": 2.97, "learning_rate": 5.345880468515367e-07, "loss": 3.5283, "step": 297000 }, { "epoch": 2.98, "learning_rate": 4.3447792571829013e-07, "loss": 3.5397, "step": 297500 }, { "epoch": 2.98, "learning_rate": 3.3436780458504356e-07, "loss": 3.5438, "step": 298000 }, { "epoch": 2.99, "learning_rate": 2.34257683451797e-07, "loss": 3.5265, "step": 298500 }, { "epoch": 2.99, "learning_rate": 1.341475623185504e-07, "loss": 3.5346, "step": 299000 }, { "epoch": 3.0, "learning_rate": 3.403744118530384e-08, "loss": 3.5366, "step": 299500 }, { "epoch": 3.0, "step": 299670, "total_flos": 4.00615718456918e+19, "train_loss": 1.773998625050055, "train_runtime": 174345.3711, "train_samples_per_second": 55.003, "train_steps_per_second": 1.719 } ], "max_steps": 299670, "num_train_epochs": 3, "total_flos": 4.00615718456918e+19, "trial_name": null, "trial_params": null }