|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9999949945189983, |
|
"global_step": 299670, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.989988987886675e-05, |
|
"loss": 3.7278, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.979977975773351e-05, |
|
"loss": 3.7216, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.9699669636600265e-05, |
|
"loss": 3.715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.9599559515467015e-05, |
|
"loss": 3.7158, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.949944939433377e-05, |
|
"loss": 3.7145, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.939933927320052e-05, |
|
"loss": 3.7008, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.929922915206727e-05, |
|
"loss": 3.6985, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.919911903093403e-05, |
|
"loss": 3.6897, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.9099008909800785e-05, |
|
"loss": 3.6934, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.8998898788667535e-05, |
|
"loss": 3.6838, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.889878866753429e-05, |
|
"loss": 3.6917, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.879867854640104e-05, |
|
"loss": 3.6839, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.869856842526779e-05, |
|
"loss": 3.6839, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.8598458304134556e-05, |
|
"loss": 3.6904, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.8498348183001306e-05, |
|
"loss": 3.687, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.8398238061868056e-05, |
|
"loss": 3.6835, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5.8298127940734806e-05, |
|
"loss": 3.6846, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5.819801781960156e-05, |
|
"loss": 3.6819, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.809790769846831e-05, |
|
"loss": 3.6802, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.799779757733507e-05, |
|
"loss": 3.6818, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.7897687456201826e-05, |
|
"loss": 3.6936, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.7797577335068576e-05, |
|
"loss": 3.6757, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.7697467213935326e-05, |
|
"loss": 3.6784, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.759735709280208e-05, |
|
"loss": 3.6812, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.749724697166884e-05, |
|
"loss": 3.6756, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.739713685053559e-05, |
|
"loss": 3.6803, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.7297026729402346e-05, |
|
"loss": 3.6851, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.7196916608269096e-05, |
|
"loss": 3.668, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.7096806487135846e-05, |
|
"loss": 3.6732, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.699669636600261e-05, |
|
"loss": 3.6791, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5.689658624486936e-05, |
|
"loss": 3.6733, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5.679647612373611e-05, |
|
"loss": 3.6656, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.6696366002602866e-05, |
|
"loss": 3.67, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.6596255881469616e-05, |
|
"loss": 3.6716, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.6496145760336366e-05, |
|
"loss": 3.671, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.639603563920313e-05, |
|
"loss": 3.6723, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.629592551806988e-05, |
|
"loss": 3.6755, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.619581539693663e-05, |
|
"loss": 3.6694, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.6095705275803387e-05, |
|
"loss": 3.6714, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.5995595154670137e-05, |
|
"loss": 3.6688, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5.589548503353689e-05, |
|
"loss": 3.6638, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5.579537491240365e-05, |
|
"loss": 3.6639, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.56952647912704e-05, |
|
"loss": 3.6567, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.559515467013715e-05, |
|
"loss": 3.6708, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.549504454900391e-05, |
|
"loss": 3.6601, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.539493442787066e-05, |
|
"loss": 3.6652, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.5294824306737414e-05, |
|
"loss": 3.6653, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.519471418560417e-05, |
|
"loss": 3.6492, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5.509460406447092e-05, |
|
"loss": 3.6575, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5.499449394333767e-05, |
|
"loss": 3.656, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.489438382220442e-05, |
|
"loss": 3.6545, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.4794273701071184e-05, |
|
"loss": 3.6486, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.4694163579937934e-05, |
|
"loss": 3.6575, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.4594053458804684e-05, |
|
"loss": 3.6579, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.449394333767144e-05, |
|
"loss": 3.6551, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.439383321653819e-05, |
|
"loss": 3.6531, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.429372309540494e-05, |
|
"loss": 3.654, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.4193612974271704e-05, |
|
"loss": 3.6438, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.4093502853138454e-05, |
|
"loss": 3.6519, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.3993392732005204e-05, |
|
"loss": 3.6542, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5.389328261087196e-05, |
|
"loss": 3.6478, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5.379317248973871e-05, |
|
"loss": 3.6613, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5.369306236860547e-05, |
|
"loss": 3.6529, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5.3592952247472224e-05, |
|
"loss": 3.6515, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.3492842126338974e-05, |
|
"loss": 3.6509, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.3392732005205724e-05, |
|
"loss": 3.66, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.329262188407248e-05, |
|
"loss": 3.6488, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.319251176293924e-05, |
|
"loss": 3.6559, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.309240164180599e-05, |
|
"loss": 3.6479, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.2992291520672745e-05, |
|
"loss": 3.6433, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.2892181399539495e-05, |
|
"loss": 3.6491, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.2792071278406245e-05, |
|
"loss": 3.655, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.2691961157273e-05, |
|
"loss": 3.6439, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.259185103613976e-05, |
|
"loss": 3.6434, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.249174091500651e-05, |
|
"loss": 3.6409, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.2391630793873265e-05, |
|
"loss": 3.6415, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.2291520672740015e-05, |
|
"loss": 3.6454, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.2191410551606765e-05, |
|
"loss": 3.646, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.209130043047353e-05, |
|
"loss": 3.6498, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.199119030934028e-05, |
|
"loss": 3.6407, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.189108018820703e-05, |
|
"loss": 3.6425, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.1790970067073785e-05, |
|
"loss": 3.6435, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1690859945940535e-05, |
|
"loss": 3.6485, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1590749824807285e-05, |
|
"loss": 3.6401, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.149063970367404e-05, |
|
"loss": 3.6369, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.13905295825408e-05, |
|
"loss": 3.634, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.129041946140755e-05, |
|
"loss": 3.6396, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.11903093402743e-05, |
|
"loss": 3.6362, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.1090199219141055e-05, |
|
"loss": 3.6328, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.099008909800781e-05, |
|
"loss": 3.6389, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.088997897687456e-05, |
|
"loss": 3.6455, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.078986885574132e-05, |
|
"loss": 3.6348, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.068975873460807e-05, |
|
"loss": 3.6386, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.058964861347482e-05, |
|
"loss": 3.6311, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.0489538492341576e-05, |
|
"loss": 3.6348, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.038942837120833e-05, |
|
"loss": 3.6394, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.028931825007508e-05, |
|
"loss": 3.6309, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.018920812894184e-05, |
|
"loss": 3.6308, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.008909800780859e-05, |
|
"loss": 3.6285, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.998898788667534e-05, |
|
"loss": 3.6377, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.98888777655421e-05, |
|
"loss": 3.6318, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.978876764440885e-05, |
|
"loss": 3.6303, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.96886575232756e-05, |
|
"loss": 3.6279, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.958854740214236e-05, |
|
"loss": 3.6245, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.948843728100911e-05, |
|
"loss": 3.6232, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.9388327159875866e-05, |
|
"loss": 3.6305, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.928821703874262e-05, |
|
"loss": 3.6197, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.918810691760937e-05, |
|
"loss": 3.6225, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.908799679647612e-05, |
|
"loss": 3.6248, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.898788667534288e-05, |
|
"loss": 3.6244, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.888777655420963e-05, |
|
"loss": 3.6323, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.8787666433076386e-05, |
|
"loss": 3.6215, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.868755631194314e-05, |
|
"loss": 3.6286, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.858744619080989e-05, |
|
"loss": 3.6305, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.848733606967664e-05, |
|
"loss": 3.6207, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.838722594854339e-05, |
|
"loss": 3.6255, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.828711582741016e-05, |
|
"loss": 3.6254, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.818700570627691e-05, |
|
"loss": 3.6274, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.808689558514366e-05, |
|
"loss": 3.6276, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.7986785464010413e-05, |
|
"loss": 3.6242, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.7886675342877163e-05, |
|
"loss": 3.6208, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.7786565221743913e-05, |
|
"loss": 3.6184, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.768645510061068e-05, |
|
"loss": 3.6234, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.758634497947743e-05, |
|
"loss": 3.6217, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.748623485834418e-05, |
|
"loss": 3.6199, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.7386124737210934e-05, |
|
"loss": 3.6157, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.7286014616077684e-05, |
|
"loss": 3.6195, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.718590449494444e-05, |
|
"loss": 3.6082, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.70857943738112e-05, |
|
"loss": 3.6117, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.698568425267795e-05, |
|
"loss": 3.6205, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.68855741315447e-05, |
|
"loss": 3.6128, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.6785464010411454e-05, |
|
"loss": 3.6186, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.6685353889278204e-05, |
|
"loss": 3.6168, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.658524376814496e-05, |
|
"loss": 3.6224, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.648513364701172e-05, |
|
"loss": 3.6112, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.638502352587847e-05, |
|
"loss": 3.6144, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.628491340474522e-05, |
|
"loss": 3.6178, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.6184803283611974e-05, |
|
"loss": 3.606, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.608469316247873e-05, |
|
"loss": 3.6104, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.598458304134548e-05, |
|
"loss": 3.6055, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.588447292021224e-05, |
|
"loss": 3.6067, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.578436279907899e-05, |
|
"loss": 3.6141, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.568425267794574e-05, |
|
"loss": 3.6147, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.55841425568125e-05, |
|
"loss": 3.6193, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.548403243567925e-05, |
|
"loss": 3.6088, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.5383922314546e-05, |
|
"loss": 3.6106, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.528381219341276e-05, |
|
"loss": 3.6058, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.518370207227951e-05, |
|
"loss": 3.615, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.508359195114626e-05, |
|
"loss": 3.6123, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.4983481830013015e-05, |
|
"loss": 3.6059, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.488337170887977e-05, |
|
"loss": 3.602, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.478326158774652e-05, |
|
"loss": 3.605, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.468315146661327e-05, |
|
"loss": 3.6099, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.458304134548003e-05, |
|
"loss": 3.602, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.4482931224346785e-05, |
|
"loss": 3.6058, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.4382821103213535e-05, |
|
"loss": 3.6044, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.428271098208029e-05, |
|
"loss": 3.6072, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.418260086094704e-05, |
|
"loss": 3.6015, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.408249073981379e-05, |
|
"loss": 3.6019, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.398238061868055e-05, |
|
"loss": 3.6099, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.3882270497547305e-05, |
|
"loss": 3.6005, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.3782160376414055e-05, |
|
"loss": 3.609, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.368205025528081e-05, |
|
"loss": 3.601, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.358194013414756e-05, |
|
"loss": 3.6013, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.348183001301431e-05, |
|
"loss": 3.6023, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3381719891881076e-05, |
|
"loss": 3.6047, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.3281609770747826e-05, |
|
"loss": 3.5886, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.3181499649614575e-05, |
|
"loss": 3.5942, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.308138952848133e-05, |
|
"loss": 3.5961, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.298127940734808e-05, |
|
"loss": 3.6019, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.288116928621483e-05, |
|
"loss": 3.6043, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.2781059165081596e-05, |
|
"loss": 3.5922, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2680949043948346e-05, |
|
"loss": 3.5973, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2580838922815096e-05, |
|
"loss": 3.603, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.248072880168185e-05, |
|
"loss": 3.6052, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.23806186805486e-05, |
|
"loss": 3.5951, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.228050855941536e-05, |
|
"loss": 3.5904, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.2180398438282116e-05, |
|
"loss": 3.5861, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.2080288317148866e-05, |
|
"loss": 3.6006, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.1980178196015616e-05, |
|
"loss": 3.5911, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.188006807488237e-05, |
|
"loss": 3.5982, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.177995795374913e-05, |
|
"loss": 3.5939, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.167984783261588e-05, |
|
"loss": 3.6002, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.157973771148263e-05, |
|
"loss": 3.5975, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.1479627590349386e-05, |
|
"loss": 3.5909, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.1379517469216136e-05, |
|
"loss": 3.5986, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.1279407348082886e-05, |
|
"loss": 3.589, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.117929722694965e-05, |
|
"loss": 3.5922, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.10791871058164e-05, |
|
"loss": 3.5921, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.097907698468315e-05, |
|
"loss": 3.5971, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.0878966863549907e-05, |
|
"loss": 3.5914, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.0778856742416657e-05, |
|
"loss": 3.5933, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.067874662128341e-05, |
|
"loss": 3.5823, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.057863650015017e-05, |
|
"loss": 3.5955, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.047852637901692e-05, |
|
"loss": 3.587, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.037841625788367e-05, |
|
"loss": 3.5958, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.027830613675043e-05, |
|
"loss": 3.5866, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.017819601561718e-05, |
|
"loss": 3.5848, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.0078085894483934e-05, |
|
"loss": 3.5803, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.997797577335069e-05, |
|
"loss": 3.5811, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.987786565221744e-05, |
|
"loss": 3.5407, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.977775553108419e-05, |
|
"loss": 3.5453, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.967764540995095e-05, |
|
"loss": 3.5439, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.9577535288817704e-05, |
|
"loss": 3.5486, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.9477425167684454e-05, |
|
"loss": 3.5476, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.937731504655121e-05, |
|
"loss": 3.548, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.927720492541796e-05, |
|
"loss": 3.5348, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.917709480428471e-05, |
|
"loss": 3.5374, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.907698468315147e-05, |
|
"loss": 3.5443, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.8976874562018224e-05, |
|
"loss": 3.547, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.8876764440884974e-05, |
|
"loss": 3.5464, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.877665431975173e-05, |
|
"loss": 3.5456, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.867654419861848e-05, |
|
"loss": 3.5402, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.857643407748523e-05, |
|
"loss": 3.5425, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.847632395635199e-05, |
|
"loss": 3.5461, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.8376213835218744e-05, |
|
"loss": 3.547, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.8276103714085494e-05, |
|
"loss": 3.547, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.8175993592952244e-05, |
|
"loss": 3.5478, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.8075883471819e-05, |
|
"loss": 3.5439, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.797577335068576e-05, |
|
"loss": 3.5428, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.787566322955251e-05, |
|
"loss": 3.5457, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.7775553108419265e-05, |
|
"loss": 3.5394, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.7675442987286015e-05, |
|
"loss": 3.5455, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.7575332866152765e-05, |
|
"loss": 3.5464, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.747522274501952e-05, |
|
"loss": 3.5397, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.737511262388628e-05, |
|
"loss": 3.5395, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.727500250275303e-05, |
|
"loss": 3.5362, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.7174892381619785e-05, |
|
"loss": 3.5442, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.7074782260486535e-05, |
|
"loss": 3.5365, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.6974672139353285e-05, |
|
"loss": 3.5408, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.687456201822005e-05, |
|
"loss": 3.5402, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.67744518970868e-05, |
|
"loss": 3.5353, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.667434177595355e-05, |
|
"loss": 3.5403, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.6574231654820305e-05, |
|
"loss": 3.5403, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.6474121533687055e-05, |
|
"loss": 3.5445, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.6374011412553805e-05, |
|
"loss": 3.5895, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.627390129142057e-05, |
|
"loss": 3.5945, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.617379117028732e-05, |
|
"loss": 3.5952, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.607368104915407e-05, |
|
"loss": 3.5922, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.5973570928020825e-05, |
|
"loss": 3.5886, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.5873460806887575e-05, |
|
"loss": 3.5939, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.577335068575433e-05, |
|
"loss": 3.5882, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.567324056462109e-05, |
|
"loss": 3.5827, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.557313044348784e-05, |
|
"loss": 3.5883, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.547302032235459e-05, |
|
"loss": 3.5938, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.5372910201221346e-05, |
|
"loss": 3.5888, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.5272800080088096e-05, |
|
"loss": 3.5887, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.517268995895485e-05, |
|
"loss": 3.5872, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.50725798378216e-05, |
|
"loss": 3.5805, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.497246971668836e-05, |
|
"loss": 3.5884, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.487235959555511e-05, |
|
"loss": 3.59, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.477224947442186e-05, |
|
"loss": 3.5949, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.467213935328862e-05, |
|
"loss": 3.5853, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.457202923215537e-05, |
|
"loss": 3.5865, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.447191911102212e-05, |
|
"loss": 3.5931, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.437180898988888e-05, |
|
"loss": 3.5939, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.427169886875563e-05, |
|
"loss": 3.5869, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.4171588747622386e-05, |
|
"loss": 3.5897, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.407147862648914e-05, |
|
"loss": 3.586, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.397136850535589e-05, |
|
"loss": 3.5922, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.387125838422264e-05, |
|
"loss": 3.5907, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.37711482630894e-05, |
|
"loss": 3.5943, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.367103814195615e-05, |
|
"loss": 3.584, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.3570928020822906e-05, |
|
"loss": 3.5856, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.347081789968966e-05, |
|
"loss": 3.5853, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.337070777855641e-05, |
|
"loss": 3.5876, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.327059765742316e-05, |
|
"loss": 3.5933, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.317048753628992e-05, |
|
"loss": 3.583, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.307037741515668e-05, |
|
"loss": 3.5854, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.297026729402343e-05, |
|
"loss": 3.5859, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.2870157172890183e-05, |
|
"loss": 3.5905, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.277004705175693e-05, |
|
"loss": 3.5863, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.266993693062368e-05, |
|
"loss": 3.5836, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.256982680949044e-05, |
|
"loss": 3.5912, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.24697166883572e-05, |
|
"loss": 3.5823, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.236960656722395e-05, |
|
"loss": 3.581, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2269496446090704e-05, |
|
"loss": 3.5875, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2169386324957454e-05, |
|
"loss": 3.5897, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.2069276203824204e-05, |
|
"loss": 3.5845, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.196916608269096e-05, |
|
"loss": 3.5937, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.186905596155772e-05, |
|
"loss": 3.5862, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.176894584042447e-05, |
|
"loss": 3.5894, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.166883571929122e-05, |
|
"loss": 3.5844, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.1568725598157974e-05, |
|
"loss": 3.5877, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.1468615477024724e-05, |
|
"loss": 3.5814, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.136850535589148e-05, |
|
"loss": 3.5898, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.126839523475824e-05, |
|
"loss": 3.5867, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.116828511362499e-05, |
|
"loss": 3.5788, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.106817499249174e-05, |
|
"loss": 3.5853, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.0968064871358494e-05, |
|
"loss": 3.5797, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.086795475022525e-05, |
|
"loss": 3.5808, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.0767844629092e-05, |
|
"loss": 3.5905, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.066773450795876e-05, |
|
"loss": 3.5829, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.056762438682551e-05, |
|
"loss": 3.5814, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.046751426569226e-05, |
|
"loss": 3.5844, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.0367404144559018e-05, |
|
"loss": 3.5842, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.026729402342577e-05, |
|
"loss": 3.5841, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.0167183902292525e-05, |
|
"loss": 3.5872, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.0067073781159275e-05, |
|
"loss": 3.5906, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.996696366002603e-05, |
|
"loss": 3.5784, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.9866853538892785e-05, |
|
"loss": 3.5907, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.9766743417759535e-05, |
|
"loss": 3.5839, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.966663329662629e-05, |
|
"loss": 3.5824, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.9566523175493045e-05, |
|
"loss": 3.5771, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.9466413054359795e-05, |
|
"loss": 3.5827, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.936630293322655e-05, |
|
"loss": 3.5817, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.92661928120933e-05, |
|
"loss": 3.5869, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.9166082690960055e-05, |
|
"loss": 3.5867, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.906597256982681e-05, |
|
"loss": 3.5923, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.896586244869356e-05, |
|
"loss": 3.5823, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.886575232756032e-05, |
|
"loss": 3.5769, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.8765642206427072e-05, |
|
"loss": 3.579, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.8665532085293822e-05, |
|
"loss": 3.5777, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.856542196416058e-05, |
|
"loss": 3.5832, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.8465311843027332e-05, |
|
"loss": 3.5799, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.8365201721894082e-05, |
|
"loss": 3.5748, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.826509160076084e-05, |
|
"loss": 3.5888, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.8164981479627592e-05, |
|
"loss": 3.5878, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.8064871358494342e-05, |
|
"loss": 3.5815, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.79647612373611e-05, |
|
"loss": 3.5785, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.7864651116227852e-05, |
|
"loss": 3.5865, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.7764540995094606e-05, |
|
"loss": 3.5718, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.766443087396136e-05, |
|
"loss": 3.5854, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.756432075282811e-05, |
|
"loss": 3.5721, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.7464210631694866e-05, |
|
"loss": 3.5775, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.736410051056162e-05, |
|
"loss": 3.5874, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.726399038942837e-05, |
|
"loss": 3.5854, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.7163880268295126e-05, |
|
"loss": 3.582, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.706377014716188e-05, |
|
"loss": 3.5753, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.6963660026028633e-05, |
|
"loss": 3.5792, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.6863549904895386e-05, |
|
"loss": 3.5759, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.676343978376214e-05, |
|
"loss": 3.5819, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.6663329662628893e-05, |
|
"loss": 3.5744, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.6563219541495646e-05, |
|
"loss": 3.5814, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.64631094203624e-05, |
|
"loss": 3.579, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.6362999299229153e-05, |
|
"loss": 3.5872, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.6262889178095906e-05, |
|
"loss": 3.5764, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.616277905696266e-05, |
|
"loss": 3.576, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.6062668935829413e-05, |
|
"loss": 3.5744, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.5962558814696166e-05, |
|
"loss": 3.5742, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.586244869356292e-05, |
|
"loss": 3.5718, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.5762338572429673e-05, |
|
"loss": 3.5761, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.5662228451296426e-05, |
|
"loss": 3.5765, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.556211833016318e-05, |
|
"loss": 3.5783, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.5462008209029933e-05, |
|
"loss": 3.5767, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.5361898087896687e-05, |
|
"loss": 3.5697, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.526178796676344e-05, |
|
"loss": 3.57, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.5161677845630193e-05, |
|
"loss": 3.573, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.506156772449695e-05, |
|
"loss": 3.5741, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.49614576033637e-05, |
|
"loss": 3.5802, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4861347482230453e-05, |
|
"loss": 3.5763, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.476123736109721e-05, |
|
"loss": 3.5697, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.466112723996396e-05, |
|
"loss": 3.5797, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.4561017118830714e-05, |
|
"loss": 3.5779, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.4460906997697467e-05, |
|
"loss": 3.5754, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.436079687656422e-05, |
|
"loss": 3.5808, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.4260686755430974e-05, |
|
"loss": 3.57, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.4160576634297727e-05, |
|
"loss": 3.5687, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.406046651316448e-05, |
|
"loss": 3.5777, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.3960356392031237e-05, |
|
"loss": 3.5793, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.3860246270897987e-05, |
|
"loss": 3.5826, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.376013614976474e-05, |
|
"loss": 3.5682, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.3660026028631497e-05, |
|
"loss": 3.5774, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.3559915907498247e-05, |
|
"loss": 3.5774, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.3459805786365e-05, |
|
"loss": 3.5682, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.3359695665231758e-05, |
|
"loss": 3.5741, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.3259585544098508e-05, |
|
"loss": 3.5718, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.3159475422965264e-05, |
|
"loss": 3.5704, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.3059365301832018e-05, |
|
"loss": 3.5711, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.2959255180698768e-05, |
|
"loss": 3.5759, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.2859145059565524e-05, |
|
"loss": 3.5717, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.2759034938432274e-05, |
|
"loss": 3.5752, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.2658924817299028e-05, |
|
"loss": 3.5771, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.2558814696165785e-05, |
|
"loss": 3.5646, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.2458704575032535e-05, |
|
"loss": 3.5694, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.2358594453899288e-05, |
|
"loss": 3.5752, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.2258484332766045e-05, |
|
"loss": 3.5732, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.2158374211632795e-05, |
|
"loss": 3.576, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.205826409049955e-05, |
|
"loss": 3.579, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.1958153969366305e-05, |
|
"loss": 3.577, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.1858043848233055e-05, |
|
"loss": 3.5761, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.175793372709981e-05, |
|
"loss": 3.5713, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.1657823605966565e-05, |
|
"loss": 3.567, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.1557713484833315e-05, |
|
"loss": 3.5702, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.145760336370007e-05, |
|
"loss": 3.5675, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.1357493242566825e-05, |
|
"loss": 3.5677, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.125738312143358e-05, |
|
"loss": 3.5756, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.1157273000300332e-05, |
|
"loss": 3.5742, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.1057162879167082e-05, |
|
"loss": 3.5783, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.095705275803384e-05, |
|
"loss": 3.5716, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.0856942636900592e-05, |
|
"loss": 3.5644, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.0756832515767342e-05, |
|
"loss": 3.5701, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.06567223946341e-05, |
|
"loss": 3.5744, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.0556612273500852e-05, |
|
"loss": 3.5715, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.0456502152367602e-05, |
|
"loss": 3.5673, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.035639203123436e-05, |
|
"loss": 3.5683, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.0256281910101112e-05, |
|
"loss": 3.5723, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.0156171788967866e-05, |
|
"loss": 3.5749, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.005606166783462e-05, |
|
"loss": 3.5611, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9955951546701372e-05, |
|
"loss": 3.561, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9855841425568126e-05, |
|
"loss": 3.5365, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.975573130443488e-05, |
|
"loss": 3.5517, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9655621183301632e-05, |
|
"loss": 3.5408, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9555511062168386e-05, |
|
"loss": 3.5401, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.945540094103514e-05, |
|
"loss": 3.5402, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.9355290819901893e-05, |
|
"loss": 3.5454, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.9255180698768646e-05, |
|
"loss": 3.537, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.91550705776354e-05, |
|
"loss": 3.5413, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.9054960456502153e-05, |
|
"loss": 3.543, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.8954850335368906e-05, |
|
"loss": 3.5367, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.885474021423566e-05, |
|
"loss": 3.5449, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.8754630093102413e-05, |
|
"loss": 3.536, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.8654519971969166e-05, |
|
"loss": 3.5429, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.855440985083592e-05, |
|
"loss": 3.5373, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.8454299729702673e-05, |
|
"loss": 3.5495, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.8354189608569426e-05, |
|
"loss": 3.5432, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.8254079487436183e-05, |
|
"loss": 3.5435, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.8153969366302933e-05, |
|
"loss": 3.545, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.8053859245169686e-05, |
|
"loss": 3.5331, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.7953749124036443e-05, |
|
"loss": 3.5315, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.7853639002903193e-05, |
|
"loss": 3.5333, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.7753528881769947e-05, |
|
"loss": 3.548, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.76534187606367e-05, |
|
"loss": 3.5485, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.7553308639503453e-05, |
|
"loss": 3.5378, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.745319851837021e-05, |
|
"loss": 3.5438, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.735308839723696e-05, |
|
"loss": 3.5346, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.7252978276103713e-05, |
|
"loss": 3.5406, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.715286815497047e-05, |
|
"loss": 3.5365, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.705275803383722e-05, |
|
"loss": 3.5403, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.6952647912703974e-05, |
|
"loss": 3.5449, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.685253779157073e-05, |
|
"loss": 3.5456, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.675242767043748e-05, |
|
"loss": 3.5391, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.6652317549304234e-05, |
|
"loss": 3.5437, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.655220742817099e-05, |
|
"loss": 3.5374, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.645209730703774e-05, |
|
"loss": 3.5483, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.6351987185904497e-05, |
|
"loss": 3.546, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.6251877064771247e-05, |
|
"loss": 3.5468, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.6151766943638e-05, |
|
"loss": 3.542, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.6051656822504757e-05, |
|
"loss": 3.5409, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.5951546701371507e-05, |
|
"loss": 3.5452, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.585143658023826e-05, |
|
"loss": 3.5398, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.5751326459105017e-05, |
|
"loss": 3.5423, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.5651216337971767e-05, |
|
"loss": 3.5423, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.5551106216838524e-05, |
|
"loss": 3.5529, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.5450996095705278e-05, |
|
"loss": 3.5467, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.5350885974572028e-05, |
|
"loss": 3.5428, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.5250775853438784e-05, |
|
"loss": 3.5497, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.5150665732305536e-05, |
|
"loss": 3.5413, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.505055561117229e-05, |
|
"loss": 3.5399, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.4950445490039045e-05, |
|
"loss": 3.5404, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.4850335368905796e-05, |
|
"loss": 3.5494, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.475022524777255e-05, |
|
"loss": 3.5332, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.4650115126639305e-05, |
|
"loss": 3.5436, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.4550005005506056e-05, |
|
"loss": 3.5414, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.444989488437281e-05, |
|
"loss": 3.5463, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.4349784763239565e-05, |
|
"loss": 3.5441, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.4249674642106318e-05, |
|
"loss": 3.5448, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.414956452097307e-05, |
|
"loss": 3.5414, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.4049454399839823e-05, |
|
"loss": 3.5428, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.3949344278706578e-05, |
|
"loss": 3.5371, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.3849234157573332e-05, |
|
"loss": 3.545, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.3749124036440083e-05, |
|
"loss": 3.5379, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.3649013915306838e-05, |
|
"loss": 3.5405, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.3548903794173592e-05, |
|
"loss": 3.5417, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.3448793673040345e-05, |
|
"loss": 3.5416, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.3348683551907097e-05, |
|
"loss": 3.543, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.3248573430773852e-05, |
|
"loss": 3.5473, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.3148463309640605e-05, |
|
"loss": 3.5424, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.3048353188507359e-05, |
|
"loss": 3.5367, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2948243067374112e-05, |
|
"loss": 3.5443, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.2848132946240865e-05, |
|
"loss": 3.5426, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.2748022825107619e-05, |
|
"loss": 3.5478, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.2647912703974372e-05, |
|
"loss": 3.5481, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.2547802582841126e-05, |
|
"loss": 3.5358, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.2447692461707879e-05, |
|
"loss": 3.5492, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.2347582340574632e-05, |
|
"loss": 3.5428, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.2247472219441386e-05, |
|
"loss": 3.5412, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.2147362098308139e-05, |
|
"loss": 3.5417, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.2047251977174892e-05, |
|
"loss": 3.5401, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1947141856041647e-05, |
|
"loss": 3.5451, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.18470317349084e-05, |
|
"loss": 3.5367, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.1746921613775153e-05, |
|
"loss": 3.5391, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.1646811492641906e-05, |
|
"loss": 3.5472, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.1546701371508661e-05, |
|
"loss": 3.5384, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.1446591250375413e-05, |
|
"loss": 3.5407, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.1346481129242166e-05, |
|
"loss": 3.5353, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.1246371008108921e-05, |
|
"loss": 3.5362, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.1146260886975673e-05, |
|
"loss": 3.5466, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.1046150765842426e-05, |
|
"loss": 3.5367, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0946040644709181e-05, |
|
"loss": 3.5408, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0845930523575935e-05, |
|
"loss": 3.5473, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0745820402442686e-05, |
|
"loss": 3.548, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.064571028130944e-05, |
|
"loss": 3.5384, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.0545600160176195e-05, |
|
"loss": 3.5336, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.0445490039042948e-05, |
|
"loss": 3.5411, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.03453799179097e-05, |
|
"loss": 3.5506, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.0245269796776455e-05, |
|
"loss": 3.548, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.0145159675643208e-05, |
|
"loss": 3.5342, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.0045049554509962e-05, |
|
"loss": 3.5415, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.944939433376713e-06, |
|
"loss": 3.5354, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.844829312243468e-06, |
|
"loss": 3.5313, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.744719191110222e-06, |
|
"loss": 3.5369, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.644609069976975e-06, |
|
"loss": 3.5452, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.544498948843729e-06, |
|
"loss": 3.5447, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.444388827710482e-06, |
|
"loss": 3.5509, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.344278706577235e-06, |
|
"loss": 3.538, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.244168585443989e-06, |
|
"loss": 3.5405, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.144058464310742e-06, |
|
"loss": 3.5293, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.043948343177495e-06, |
|
"loss": 3.5478, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.943838222044249e-06, |
|
"loss": 3.53, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.843728100911002e-06, |
|
"loss": 3.54, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.743617979777756e-06, |
|
"loss": 3.5342, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.643507858644509e-06, |
|
"loss": 3.5373, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.543397737511264e-06, |
|
"loss": 3.5341, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 8.443287616378016e-06, |
|
"loss": 3.5384, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 8.343177495244769e-06, |
|
"loss": 3.5415, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.243067374111522e-06, |
|
"loss": 3.5315, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.142957252978277e-06, |
|
"loss": 3.5401, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 8.042847131845029e-06, |
|
"loss": 3.541, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.942737010711783e-06, |
|
"loss": 3.5443, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.842626889578538e-06, |
|
"loss": 3.5413, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.742516768445291e-06, |
|
"loss": 3.5302, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.642406647312043e-06, |
|
"loss": 3.5429, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.542296526178797e-06, |
|
"loss": 3.5417, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.442186405045551e-06, |
|
"loss": 3.5402, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.342076283912304e-06, |
|
"loss": 3.5407, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.241966162779057e-06, |
|
"loss": 3.5383, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.14185604164581e-06, |
|
"loss": 3.5362, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.041745920512564e-06, |
|
"loss": 3.5428, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.941635799379317e-06, |
|
"loss": 3.5374, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.8415256782460705e-06, |
|
"loss": 3.5414, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.741415557112825e-06, |
|
"loss": 3.5317, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.641305435979577e-06, |
|
"loss": 3.54, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.5411953148463315e-06, |
|
"loss": 3.543, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.441085193713084e-06, |
|
"loss": 3.5407, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.340975072579838e-06, |
|
"loss": 3.5433, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.240864951446592e-06, |
|
"loss": 3.5359, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.140754830313345e-06, |
|
"loss": 3.5419, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.040644709180098e-06, |
|
"loss": 3.5488, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.940534588046852e-06, |
|
"loss": 3.5352, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.840424466913605e-06, |
|
"loss": 3.5328, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.7403143457803585e-06, |
|
"loss": 3.5397, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.640204224647112e-06, |
|
"loss": 3.529, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.540094103513866e-06, |
|
"loss": 3.5405, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.439983982380619e-06, |
|
"loss": 3.5309, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.339873861247372e-06, |
|
"loss": 3.5336, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.239763740114125e-06, |
|
"loss": 3.5471, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.139653618980879e-06, |
|
"loss": 3.5301, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 5.039543497847633e-06, |
|
"loss": 3.537, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.9394333767143855e-06, |
|
"loss": 3.5421, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.83932325558114e-06, |
|
"loss": 3.5365, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.739213134447892e-06, |
|
"loss": 3.5313, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.6391030133146465e-06, |
|
"loss": 3.5356, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.5389928921814e-06, |
|
"loss": 3.5387, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.438882771048153e-06, |
|
"loss": 3.5386, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.338772649914907e-06, |
|
"loss": 3.5426, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.23866252878166e-06, |
|
"loss": 3.5425, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.138552407648413e-06, |
|
"loss": 3.5396, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.038442286515167e-06, |
|
"loss": 3.5389, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.93833216538192e-06, |
|
"loss": 3.5308, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.838222044248674e-06, |
|
"loss": 3.5359, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.738111923115427e-06, |
|
"loss": 3.5365, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.6380018019821807e-06, |
|
"loss": 3.5287, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.537891680848934e-06, |
|
"loss": 3.5452, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.4377815597156874e-06, |
|
"loss": 3.545, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.337671438582441e-06, |
|
"loss": 3.5322, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.237561317449194e-06, |
|
"loss": 3.5277, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.1374511963159475e-06, |
|
"loss": 3.5443, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.0373410751827013e-06, |
|
"loss": 3.5467, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.9372309540494547e-06, |
|
"loss": 3.5383, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.837120832916208e-06, |
|
"loss": 3.5327, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.7370107117829615e-06, |
|
"loss": 3.534, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.6369005906497144e-06, |
|
"loss": 3.5382, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.536790469516468e-06, |
|
"loss": 3.5347, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.4366803483832216e-06, |
|
"loss": 3.5277, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.336570227249975e-06, |
|
"loss": 3.5319, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.2364601061167284e-06, |
|
"loss": 3.5436, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1363499849834817e-06, |
|
"loss": 3.538, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.036239863850235e-06, |
|
"loss": 3.5365, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.936129742716989e-06, |
|
"loss": 3.5385, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.836019621583742e-06, |
|
"loss": 3.5317, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7359095004504957e-06, |
|
"loss": 3.536, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.635799379317249e-06, |
|
"loss": 3.5383, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5356892581840024e-06, |
|
"loss": 3.5316, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.435579137050756e-06, |
|
"loss": 3.5352, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.3354690159175094e-06, |
|
"loss": 3.5296, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.2353588947842625e-06, |
|
"loss": 3.5341, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1352487736510161e-06, |
|
"loss": 3.5336, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.0351386525177695e-06, |
|
"loss": 3.5404, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.35028531384523e-07, |
|
"loss": 3.5433, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.349184102512765e-07, |
|
"loss": 3.5247, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.348082891180298e-07, |
|
"loss": 3.5389, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 6.346981679847833e-07, |
|
"loss": 3.539, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.345880468515367e-07, |
|
"loss": 3.5283, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.3447792571829013e-07, |
|
"loss": 3.5397, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.3436780458504356e-07, |
|
"loss": 3.5438, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.34257683451797e-07, |
|
"loss": 3.5265, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.341475623185504e-07, |
|
"loss": 3.5346, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.403744118530384e-08, |
|
"loss": 3.5366, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 299670, |
|
"total_flos": 4.00615718456918e+19, |
|
"train_loss": 1.773998625050055, |
|
"train_runtime": 174345.3711, |
|
"train_samples_per_second": 55.003, |
|
"train_steps_per_second": 1.719 |
|
} |
|
], |
|
"max_steps": 299670, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.00615718456918e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|