mbart-summarization-ilpost / trainer_state.json
artelabsuper
train
e10840f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"global_step": 140804,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.9822448225902675e-05,
"loss": 3.154,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 4.9644896451805353e-05,
"loss": 2.9047,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 4.946734467770802e-05,
"loss": 2.8071,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 4.928979290361069e-05,
"loss": 2.9013,
"step": 2000
},
{
"epoch": 0.07,
"learning_rate": 4.911224112951337e-05,
"loss": 2.7409,
"step": 2500
},
{
"epoch": 0.09,
"learning_rate": 4.893468935541604e-05,
"loss": 7.1257,
"step": 3000
},
{
"epoch": 0.1,
"learning_rate": 4.8757137581318714e-05,
"loss": 2.904,
"step": 3500
},
{
"epoch": 0.11,
"learning_rate": 4.8579585807221386e-05,
"loss": 2.823,
"step": 4000
},
{
"epoch": 0.13,
"learning_rate": 4.840203403312406e-05,
"loss": 2.7295,
"step": 4500
},
{
"epoch": 0.14,
"learning_rate": 4.822448225902673e-05,
"loss": 2.7083,
"step": 5000
},
{
"epoch": 0.16,
"learning_rate": 4.804693048492941e-05,
"loss": 2.6503,
"step": 5500
},
{
"epoch": 0.17,
"learning_rate": 4.786937871083208e-05,
"loss": 2.6333,
"step": 6000
},
{
"epoch": 0.18,
"learning_rate": 4.7691826936734754e-05,
"loss": 2.6817,
"step": 6500
},
{
"epoch": 0.2,
"learning_rate": 4.7514275162637426e-05,
"loss": 2.5641,
"step": 7000
},
{
"epoch": 0.21,
"learning_rate": 4.73367233885401e-05,
"loss": 2.5646,
"step": 7500
},
{
"epoch": 0.23,
"learning_rate": 4.715917161444278e-05,
"loss": 2.5781,
"step": 8000
},
{
"epoch": 0.24,
"learning_rate": 4.698161984034545e-05,
"loss": 2.5461,
"step": 8500
},
{
"epoch": 0.26,
"learning_rate": 4.680406806624812e-05,
"loss": 2.5121,
"step": 9000
},
{
"epoch": 0.27,
"learning_rate": 4.662651629215079e-05,
"loss": 2.5666,
"step": 9500
},
{
"epoch": 0.28,
"learning_rate": 4.6448964518053465e-05,
"loss": 2.5357,
"step": 10000
},
{
"epoch": 0.3,
"learning_rate": 4.627141274395614e-05,
"loss": 2.4651,
"step": 10500
},
{
"epoch": 0.31,
"learning_rate": 4.6093860969858816e-05,
"loss": 2.5033,
"step": 11000
},
{
"epoch": 0.33,
"learning_rate": 4.591630919576149e-05,
"loss": 2.5227,
"step": 11500
},
{
"epoch": 0.34,
"learning_rate": 4.573875742166416e-05,
"loss": 2.4618,
"step": 12000
},
{
"epoch": 0.36,
"learning_rate": 4.556120564756683e-05,
"loss": 2.4844,
"step": 12500
},
{
"epoch": 0.37,
"learning_rate": 4.5383653873469505e-05,
"loss": 2.5515,
"step": 13000
},
{
"epoch": 0.38,
"learning_rate": 4.520610209937218e-05,
"loss": 2.5156,
"step": 13500
},
{
"epoch": 0.4,
"learning_rate": 4.5028550325274856e-05,
"loss": 2.4882,
"step": 14000
},
{
"epoch": 0.41,
"learning_rate": 4.485099855117753e-05,
"loss": 2.4225,
"step": 14500
},
{
"epoch": 0.43,
"learning_rate": 4.46734467770802e-05,
"loss": 2.4459,
"step": 15000
},
{
"epoch": 0.44,
"learning_rate": 4.449589500298287e-05,
"loss": 2.5628,
"step": 15500
},
{
"epoch": 0.45,
"learning_rate": 4.4318343228885544e-05,
"loss": 2.4405,
"step": 16000
},
{
"epoch": 0.47,
"learning_rate": 4.4140791454788216e-05,
"loss": 2.5211,
"step": 16500
},
{
"epoch": 0.48,
"learning_rate": 4.3963239680690895e-05,
"loss": 2.5868,
"step": 17000
},
{
"epoch": 0.5,
"learning_rate": 4.378568790659357e-05,
"loss": 2.4548,
"step": 17500
},
{
"epoch": 0.51,
"learning_rate": 4.360813613249623e-05,
"loss": 2.4125,
"step": 18000
},
{
"epoch": 0.53,
"learning_rate": 4.343058435839891e-05,
"loss": 2.3719,
"step": 18500
},
{
"epoch": 0.54,
"learning_rate": 4.3253032584301584e-05,
"loss": 2.4151,
"step": 19000
},
{
"epoch": 0.55,
"learning_rate": 4.3075480810204256e-05,
"loss": 2.4405,
"step": 19500
},
{
"epoch": 0.57,
"learning_rate": 4.2897929036106935e-05,
"loss": 2.4156,
"step": 20000
},
{
"epoch": 0.58,
"learning_rate": 4.27203772620096e-05,
"loss": 2.4121,
"step": 20500
},
{
"epoch": 0.6,
"learning_rate": 4.254282548791227e-05,
"loss": 2.3612,
"step": 21000
},
{
"epoch": 0.61,
"learning_rate": 4.236527371381495e-05,
"loss": 2.3231,
"step": 21500
},
{
"epoch": 0.62,
"learning_rate": 4.2187721939717623e-05,
"loss": 2.3604,
"step": 22000
},
{
"epoch": 0.64,
"learning_rate": 4.20101701656203e-05,
"loss": 2.3752,
"step": 22500
},
{
"epoch": 0.65,
"learning_rate": 4.183261839152297e-05,
"loss": 2.3747,
"step": 23000
},
{
"epoch": 0.67,
"learning_rate": 4.165506661742564e-05,
"loss": 2.3668,
"step": 23500
},
{
"epoch": 0.68,
"learning_rate": 4.147751484332832e-05,
"loss": 2.4354,
"step": 24000
},
{
"epoch": 0.7,
"learning_rate": 4.129996306923099e-05,
"loss": 2.3519,
"step": 24500
},
{
"epoch": 0.71,
"learning_rate": 4.112241129513366e-05,
"loss": 2.3312,
"step": 25000
},
{
"epoch": 0.72,
"learning_rate": 4.0944859521036335e-05,
"loss": 2.3092,
"step": 25500
},
{
"epoch": 0.74,
"learning_rate": 4.076730774693901e-05,
"loss": 2.378,
"step": 26000
},
{
"epoch": 0.75,
"learning_rate": 4.058975597284168e-05,
"loss": 2.371,
"step": 26500
},
{
"epoch": 0.77,
"learning_rate": 4.041220419874436e-05,
"loss": 2.2957,
"step": 27000
},
{
"epoch": 0.78,
"learning_rate": 4.023465242464703e-05,
"loss": 2.3008,
"step": 27500
},
{
"epoch": 0.8,
"learning_rate": 4.00571006505497e-05,
"loss": 2.3263,
"step": 28000
},
{
"epoch": 0.81,
"learning_rate": 3.9879548876452375e-05,
"loss": 2.2668,
"step": 28500
},
{
"epoch": 0.82,
"learning_rate": 3.970199710235505e-05,
"loss": 2.3133,
"step": 29000
},
{
"epoch": 0.84,
"learning_rate": 3.952444532825772e-05,
"loss": 2.2935,
"step": 29500
},
{
"epoch": 0.85,
"learning_rate": 3.93468935541604e-05,
"loss": 2.2905,
"step": 30000
},
{
"epoch": 0.87,
"learning_rate": 3.916934178006307e-05,
"loss": 2.2517,
"step": 30500
},
{
"epoch": 0.88,
"learning_rate": 3.899179000596574e-05,
"loss": 2.1286,
"step": 31000
},
{
"epoch": 0.89,
"learning_rate": 3.8814238231868414e-05,
"loss": 2.2542,
"step": 31500
},
{
"epoch": 0.91,
"learning_rate": 3.8636686457771086e-05,
"loss": 2.272,
"step": 32000
},
{
"epoch": 0.92,
"learning_rate": 3.845913468367376e-05,
"loss": 2.3143,
"step": 32500
},
{
"epoch": 0.94,
"learning_rate": 3.828158290957644e-05,
"loss": 2.2873,
"step": 33000
},
{
"epoch": 0.95,
"learning_rate": 3.810403113547911e-05,
"loss": 2.2756,
"step": 33500
},
{
"epoch": 0.97,
"learning_rate": 3.7926479361381775e-05,
"loss": 2.3062,
"step": 34000
},
{
"epoch": 0.98,
"learning_rate": 3.7748927587284454e-05,
"loss": 2.27,
"step": 34500
},
{
"epoch": 0.99,
"learning_rate": 3.7571375813187126e-05,
"loss": 2.1815,
"step": 35000
},
{
"epoch": 1.01,
"learning_rate": 3.7393824039089805e-05,
"loss": 2.001,
"step": 35500
},
{
"epoch": 1.02,
"learning_rate": 3.721627226499248e-05,
"loss": 1.7794,
"step": 36000
},
{
"epoch": 1.04,
"learning_rate": 3.703872049089515e-05,
"loss": 1.782,
"step": 36500
},
{
"epoch": 1.05,
"learning_rate": 3.686116871679782e-05,
"loss": 1.7742,
"step": 37000
},
{
"epoch": 1.07,
"learning_rate": 3.668361694270049e-05,
"loss": 1.8578,
"step": 37500
},
{
"epoch": 1.08,
"learning_rate": 3.6506065168603165e-05,
"loss": 1.7768,
"step": 38000
},
{
"epoch": 1.09,
"learning_rate": 3.6328513394505844e-05,
"loss": 1.8162,
"step": 38500
},
{
"epoch": 1.11,
"learning_rate": 3.6150961620408516e-05,
"loss": 1.7432,
"step": 39000
},
{
"epoch": 1.12,
"learning_rate": 3.597340984631118e-05,
"loss": 1.7879,
"step": 39500
},
{
"epoch": 1.14,
"learning_rate": 3.579585807221386e-05,
"loss": 1.8667,
"step": 40000
},
{
"epoch": 1.15,
"learning_rate": 3.561830629811653e-05,
"loss": 1.8724,
"step": 40500
},
{
"epoch": 1.16,
"learning_rate": 3.5440754524019205e-05,
"loss": 1.8405,
"step": 41000
},
{
"epoch": 1.18,
"learning_rate": 3.5263202749921884e-05,
"loss": 1.801,
"step": 41500
},
{
"epoch": 1.19,
"learning_rate": 3.508565097582455e-05,
"loss": 1.8085,
"step": 42000
},
{
"epoch": 1.21,
"learning_rate": 3.490809920172722e-05,
"loss": 1.8483,
"step": 42500
},
{
"epoch": 1.22,
"learning_rate": 3.47305474276299e-05,
"loss": 1.7794,
"step": 43000
},
{
"epoch": 1.24,
"learning_rate": 3.455299565353257e-05,
"loss": 1.8514,
"step": 43500
},
{
"epoch": 1.25,
"learning_rate": 3.4375443879435245e-05,
"loss": 1.8237,
"step": 44000
},
{
"epoch": 1.26,
"learning_rate": 3.419789210533792e-05,
"loss": 1.7449,
"step": 44500
},
{
"epoch": 1.28,
"learning_rate": 3.402034033124059e-05,
"loss": 1.8584,
"step": 45000
},
{
"epoch": 1.29,
"learning_rate": 3.384278855714326e-05,
"loss": 1.7689,
"step": 45500
},
{
"epoch": 1.31,
"learning_rate": 3.366523678304594e-05,
"loss": 1.743,
"step": 46000
},
{
"epoch": 1.32,
"learning_rate": 3.348768500894861e-05,
"loss": 1.805,
"step": 46500
},
{
"epoch": 1.34,
"learning_rate": 3.3310133234851284e-05,
"loss": 1.7769,
"step": 47000
},
{
"epoch": 1.35,
"learning_rate": 3.3132581460753956e-05,
"loss": 1.8575,
"step": 47500
},
{
"epoch": 1.36,
"learning_rate": 3.295502968665663e-05,
"loss": 1.8076,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 3.27774779125593e-05,
"loss": 1.7576,
"step": 48500
},
{
"epoch": 1.39,
"learning_rate": 3.259992613846198e-05,
"loss": 1.7733,
"step": 49000
},
{
"epoch": 1.41,
"learning_rate": 3.242237436436465e-05,
"loss": 1.8068,
"step": 49500
},
{
"epoch": 1.42,
"learning_rate": 3.2244822590267324e-05,
"loss": 1.771,
"step": 50000
},
{
"epoch": 1.43,
"learning_rate": 3.2067270816169996e-05,
"loss": 1.7543,
"step": 50500
},
{
"epoch": 1.45,
"learning_rate": 3.188971904207267e-05,
"loss": 1.757,
"step": 51000
},
{
"epoch": 1.46,
"learning_rate": 3.171216726797535e-05,
"loss": 1.8191,
"step": 51500
},
{
"epoch": 1.48,
"learning_rate": 3.153461549387802e-05,
"loss": 1.7823,
"step": 52000
},
{
"epoch": 1.49,
"learning_rate": 3.135706371978069e-05,
"loss": 1.7887,
"step": 52500
},
{
"epoch": 1.51,
"learning_rate": 3.117951194568336e-05,
"loss": 1.8028,
"step": 53000
},
{
"epoch": 1.52,
"learning_rate": 3.1001960171586035e-05,
"loss": 1.7986,
"step": 53500
},
{
"epoch": 1.53,
"learning_rate": 3.082440839748871e-05,
"loss": 1.8395,
"step": 54000
},
{
"epoch": 1.55,
"learning_rate": 3.0646856623391386e-05,
"loss": 1.7667,
"step": 54500
},
{
"epoch": 1.56,
"learning_rate": 3.0469304849294055e-05,
"loss": 1.7733,
"step": 55000
},
{
"epoch": 1.58,
"learning_rate": 3.0291753075196727e-05,
"loss": 1.8444,
"step": 55500
},
{
"epoch": 1.59,
"learning_rate": 3.0114201301099403e-05,
"loss": 1.7362,
"step": 56000
},
{
"epoch": 1.61,
"learning_rate": 2.9936649527002075e-05,
"loss": 1.783,
"step": 56500
},
{
"epoch": 1.62,
"learning_rate": 2.9759097752904747e-05,
"loss": 1.7838,
"step": 57000
},
{
"epoch": 1.63,
"learning_rate": 2.9581545978807422e-05,
"loss": 1.8201,
"step": 57500
},
{
"epoch": 1.65,
"learning_rate": 2.9403994204710095e-05,
"loss": 1.7784,
"step": 58000
},
{
"epoch": 1.66,
"learning_rate": 2.9226442430612767e-05,
"loss": 1.7708,
"step": 58500
},
{
"epoch": 1.68,
"learning_rate": 2.9048890656515442e-05,
"loss": 1.7986,
"step": 59000
},
{
"epoch": 1.69,
"learning_rate": 2.8871338882418114e-05,
"loss": 1.7504,
"step": 59500
},
{
"epoch": 1.7,
"learning_rate": 2.8693787108320786e-05,
"loss": 1.7938,
"step": 60000
},
{
"epoch": 1.72,
"learning_rate": 2.8516235334223462e-05,
"loss": 1.8189,
"step": 60500
},
{
"epoch": 1.73,
"learning_rate": 2.8338683560126134e-05,
"loss": 1.8234,
"step": 61000
},
{
"epoch": 1.75,
"learning_rate": 2.8161131786028806e-05,
"loss": 1.7313,
"step": 61500
},
{
"epoch": 1.76,
"learning_rate": 2.7983580011931482e-05,
"loss": 1.7483,
"step": 62000
},
{
"epoch": 1.78,
"learning_rate": 2.7806028237834154e-05,
"loss": 1.7735,
"step": 62500
},
{
"epoch": 1.79,
"learning_rate": 2.7628476463736823e-05,
"loss": 1.7133,
"step": 63000
},
{
"epoch": 1.8,
"learning_rate": 2.74509246896395e-05,
"loss": 1.7454,
"step": 63500
},
{
"epoch": 1.82,
"learning_rate": 2.7273372915542174e-05,
"loss": 1.7284,
"step": 64000
},
{
"epoch": 1.83,
"learning_rate": 2.709582114144485e-05,
"loss": 1.7353,
"step": 64500
},
{
"epoch": 1.85,
"learning_rate": 2.691826936734752e-05,
"loss": 1.751,
"step": 65000
},
{
"epoch": 1.86,
"learning_rate": 2.6740717593250193e-05,
"loss": 1.751,
"step": 65500
},
{
"epoch": 1.87,
"learning_rate": 2.656316581915287e-05,
"loss": 1.7628,
"step": 66000
},
{
"epoch": 1.89,
"learning_rate": 2.638561404505554e-05,
"loss": 1.732,
"step": 66500
},
{
"epoch": 1.9,
"learning_rate": 2.620806227095821e-05,
"loss": 1.7149,
"step": 67000
},
{
"epoch": 1.92,
"learning_rate": 2.603051049686089e-05,
"loss": 1.7349,
"step": 67500
},
{
"epoch": 1.93,
"learning_rate": 2.585295872276356e-05,
"loss": 1.7029,
"step": 68000
},
{
"epoch": 1.95,
"learning_rate": 2.567540694866623e-05,
"loss": 1.7073,
"step": 68500
},
{
"epoch": 1.96,
"learning_rate": 2.549785517456891e-05,
"loss": 1.7113,
"step": 69000
},
{
"epoch": 1.97,
"learning_rate": 2.5320303400471577e-05,
"loss": 1.7561,
"step": 69500
},
{
"epoch": 1.99,
"learning_rate": 2.514275162637425e-05,
"loss": 1.707,
"step": 70000
},
{
"epoch": 2.0,
"learning_rate": 2.4965199852276925e-05,
"loss": 1.6343,
"step": 70500
},
{
"epoch": 2.02,
"learning_rate": 2.4787648078179597e-05,
"loss": 1.2461,
"step": 71000
},
{
"epoch": 2.03,
"learning_rate": 2.4610096304082273e-05,
"loss": 1.1708,
"step": 71500
},
{
"epoch": 2.05,
"learning_rate": 2.4432544529984945e-05,
"loss": 1.2014,
"step": 72000
},
{
"epoch": 2.06,
"learning_rate": 2.4254992755887617e-05,
"loss": 1.223,
"step": 72500
},
{
"epoch": 2.07,
"learning_rate": 2.4077440981790292e-05,
"loss": 1.2299,
"step": 73000
},
{
"epoch": 2.09,
"learning_rate": 2.3899889207692964e-05,
"loss": 1.1634,
"step": 73500
},
{
"epoch": 2.1,
"learning_rate": 2.3722337433595637e-05,
"loss": 1.1944,
"step": 74000
},
{
"epoch": 2.12,
"learning_rate": 2.3544785659498312e-05,
"loss": 1.2264,
"step": 74500
},
{
"epoch": 2.13,
"learning_rate": 2.3367233885400984e-05,
"loss": 1.2196,
"step": 75000
},
{
"epoch": 2.14,
"learning_rate": 2.3189682111303656e-05,
"loss": 1.209,
"step": 75500
},
{
"epoch": 2.16,
"learning_rate": 2.3012130337206332e-05,
"loss": 1.1837,
"step": 76000
},
{
"epoch": 2.17,
"learning_rate": 2.2834578563109004e-05,
"loss": 1.1855,
"step": 76500
},
{
"epoch": 2.19,
"learning_rate": 2.2657026789011676e-05,
"loss": 1.236,
"step": 77000
},
{
"epoch": 2.2,
"learning_rate": 2.247947501491435e-05,
"loss": 1.2629,
"step": 77500
},
{
"epoch": 2.22,
"learning_rate": 2.2301923240817024e-05,
"loss": 1.2625,
"step": 78000
},
{
"epoch": 2.23,
"learning_rate": 2.2124371466719696e-05,
"loss": 1.2181,
"step": 78500
},
{
"epoch": 2.24,
"learning_rate": 2.1946819692622368e-05,
"loss": 1.2551,
"step": 79000
},
{
"epoch": 2.26,
"learning_rate": 2.1769267918525044e-05,
"loss": 1.2755,
"step": 79500
},
{
"epoch": 2.27,
"learning_rate": 2.1591716144427716e-05,
"loss": 1.2594,
"step": 80000
},
{
"epoch": 2.29,
"learning_rate": 2.1414164370330388e-05,
"loss": 1.2469,
"step": 80500
},
{
"epoch": 2.3,
"learning_rate": 2.1236612596233063e-05,
"loss": 1.1807,
"step": 81000
},
{
"epoch": 2.32,
"learning_rate": 2.105906082213574e-05,
"loss": 1.2618,
"step": 81500
},
{
"epoch": 2.33,
"learning_rate": 2.0881509048038408e-05,
"loss": 1.2172,
"step": 82000
},
{
"epoch": 2.34,
"learning_rate": 2.0703957273941083e-05,
"loss": 1.2428,
"step": 82500
},
{
"epoch": 2.36,
"learning_rate": 2.0526405499843755e-05,
"loss": 1.2209,
"step": 83000
},
{
"epoch": 2.37,
"learning_rate": 2.0348853725746427e-05,
"loss": 1.2078,
"step": 83500
},
{
"epoch": 2.39,
"learning_rate": 2.0171301951649103e-05,
"loss": 1.23,
"step": 84000
},
{
"epoch": 2.4,
"learning_rate": 1.9993750177551775e-05,
"loss": 1.2381,
"step": 84500
},
{
"epoch": 2.41,
"learning_rate": 1.9816198403454447e-05,
"loss": 1.224,
"step": 85000
},
{
"epoch": 2.43,
"learning_rate": 1.9638646629357123e-05,
"loss": 1.2287,
"step": 85500
},
{
"epoch": 2.44,
"learning_rate": 1.9461094855259795e-05,
"loss": 1.1941,
"step": 86000
},
{
"epoch": 2.46,
"learning_rate": 1.9283543081162467e-05,
"loss": 1.222,
"step": 86500
},
{
"epoch": 2.47,
"learning_rate": 1.9105991307065142e-05,
"loss": 1.2172,
"step": 87000
},
{
"epoch": 2.49,
"learning_rate": 1.8928439532967814e-05,
"loss": 1.2138,
"step": 87500
},
{
"epoch": 2.5,
"learning_rate": 1.8750887758870487e-05,
"loss": 1.229,
"step": 88000
},
{
"epoch": 2.51,
"learning_rate": 1.857333598477316e-05,
"loss": 1.2155,
"step": 88500
},
{
"epoch": 2.53,
"learning_rate": 1.8395784210675834e-05,
"loss": 1.1969,
"step": 89000
},
{
"epoch": 2.54,
"learning_rate": 1.821823243657851e-05,
"loss": 1.1926,
"step": 89500
},
{
"epoch": 2.56,
"learning_rate": 1.804068066248118e-05,
"loss": 1.2056,
"step": 90000
},
{
"epoch": 2.57,
"learning_rate": 1.7863128888383854e-05,
"loss": 1.1989,
"step": 90500
},
{
"epoch": 2.59,
"learning_rate": 1.7685577114286526e-05,
"loss": 1.2367,
"step": 91000
},
{
"epoch": 2.6,
"learning_rate": 1.7508025340189198e-05,
"loss": 1.2222,
"step": 91500
},
{
"epoch": 2.61,
"learning_rate": 1.7330473566091874e-05,
"loss": 1.161,
"step": 92000
},
{
"epoch": 2.63,
"learning_rate": 1.7152921791994546e-05,
"loss": 1.2405,
"step": 92500
},
{
"epoch": 2.64,
"learning_rate": 1.6975370017897218e-05,
"loss": 1.2184,
"step": 93000
},
{
"epoch": 2.66,
"learning_rate": 1.6797818243799894e-05,
"loss": 1.2103,
"step": 93500
},
{
"epoch": 2.67,
"learning_rate": 1.6620266469702566e-05,
"loss": 1.2416,
"step": 94000
},
{
"epoch": 2.68,
"learning_rate": 1.6442714695605238e-05,
"loss": 1.2149,
"step": 94500
},
{
"epoch": 2.7,
"learning_rate": 1.6265162921507913e-05,
"loss": 1.1707,
"step": 95000
},
{
"epoch": 2.71,
"learning_rate": 1.6087611147410585e-05,
"loss": 1.2351,
"step": 95500
},
{
"epoch": 2.73,
"learning_rate": 1.591005937331326e-05,
"loss": 1.2179,
"step": 96000
},
{
"epoch": 2.74,
"learning_rate": 1.5732507599215933e-05,
"loss": 1.201,
"step": 96500
},
{
"epoch": 2.76,
"learning_rate": 1.5554955825118605e-05,
"loss": 1.2169,
"step": 97000
},
{
"epoch": 2.77,
"learning_rate": 1.537740405102128e-05,
"loss": 1.1485,
"step": 97500
},
{
"epoch": 2.78,
"learning_rate": 1.5199852276923951e-05,
"loss": 1.166,
"step": 98000
},
{
"epoch": 2.8,
"learning_rate": 1.5022300502826625e-05,
"loss": 1.1665,
"step": 98500
},
{
"epoch": 2.81,
"learning_rate": 1.4844748728729299e-05,
"loss": 1.184,
"step": 99000
},
{
"epoch": 2.83,
"learning_rate": 1.4667196954631971e-05,
"loss": 1.2234,
"step": 99500
},
{
"epoch": 2.84,
"learning_rate": 1.4489645180534645e-05,
"loss": 1.1631,
"step": 100000
},
{
"epoch": 2.86,
"learning_rate": 1.4312093406437319e-05,
"loss": 1.1708,
"step": 100500
},
{
"epoch": 2.87,
"learning_rate": 1.4134541632339989e-05,
"loss": 1.1783,
"step": 101000
},
{
"epoch": 2.88,
"learning_rate": 1.3956989858242665e-05,
"loss": 1.2218,
"step": 101500
},
{
"epoch": 2.9,
"learning_rate": 1.3779438084145338e-05,
"loss": 1.1166,
"step": 102000
},
{
"epoch": 2.91,
"learning_rate": 1.3601886310048009e-05,
"loss": 1.1316,
"step": 102500
},
{
"epoch": 2.93,
"learning_rate": 1.3424334535950683e-05,
"loss": 1.1974,
"step": 103000
},
{
"epoch": 2.94,
"learning_rate": 1.3246782761853358e-05,
"loss": 1.2132,
"step": 103500
},
{
"epoch": 2.95,
"learning_rate": 1.3069230987756032e-05,
"loss": 1.1827,
"step": 104000
},
{
"epoch": 2.97,
"learning_rate": 1.2891679213658702e-05,
"loss": 1.221,
"step": 104500
},
{
"epoch": 2.98,
"learning_rate": 1.2714127439561376e-05,
"loss": 1.1951,
"step": 105000
},
{
"epoch": 3.0,
"learning_rate": 1.2536575665464052e-05,
"loss": 1.1593,
"step": 105500
},
{
"epoch": 3.01,
"learning_rate": 1.2359023891366724e-05,
"loss": 0.8358,
"step": 106000
},
{
"epoch": 3.03,
"learning_rate": 1.2181472117269396e-05,
"loss": 0.7614,
"step": 106500
},
{
"epoch": 3.04,
"learning_rate": 1.2003920343172068e-05,
"loss": 0.7304,
"step": 107000
},
{
"epoch": 3.05,
"learning_rate": 1.1826368569074744e-05,
"loss": 0.7472,
"step": 107500
},
{
"epoch": 3.07,
"learning_rate": 1.1648816794977416e-05,
"loss": 0.7296,
"step": 108000
},
{
"epoch": 3.08,
"learning_rate": 1.147126502088009e-05,
"loss": 0.7666,
"step": 108500
},
{
"epoch": 3.1,
"learning_rate": 1.1293713246782762e-05,
"loss": 0.7406,
"step": 109000
},
{
"epoch": 3.11,
"learning_rate": 1.1116161472685436e-05,
"loss": 0.7183,
"step": 109500
},
{
"epoch": 3.12,
"learning_rate": 1.093860969858811e-05,
"loss": 0.7044,
"step": 110000
},
{
"epoch": 3.14,
"learning_rate": 1.0761057924490782e-05,
"loss": 0.7215,
"step": 110500
},
{
"epoch": 3.15,
"learning_rate": 1.0583506150393455e-05,
"loss": 0.6939,
"step": 111000
},
{
"epoch": 3.17,
"learning_rate": 1.0405954376296129e-05,
"loss": 0.7412,
"step": 111500
},
{
"epoch": 3.18,
"learning_rate": 1.0228402602198801e-05,
"loss": 0.7525,
"step": 112000
},
{
"epoch": 3.2,
"learning_rate": 1.0050850828101475e-05,
"loss": 0.7109,
"step": 112500
},
{
"epoch": 3.21,
"learning_rate": 9.873299054004149e-06,
"loss": 0.7163,
"step": 113000
},
{
"epoch": 3.22,
"learning_rate": 9.695747279906821e-06,
"loss": 0.7131,
"step": 113500
},
{
"epoch": 3.24,
"learning_rate": 9.518195505809495e-06,
"loss": 0.7461,
"step": 114000
},
{
"epoch": 3.25,
"learning_rate": 9.340643731712167e-06,
"loss": 0.6805,
"step": 114500
},
{
"epoch": 3.27,
"learning_rate": 9.16309195761484e-06,
"loss": 0.7688,
"step": 115000
},
{
"epoch": 3.28,
"learning_rate": 8.985540183517515e-06,
"loss": 0.7359,
"step": 115500
},
{
"epoch": 3.3,
"learning_rate": 8.807988409420187e-06,
"loss": 0.7284,
"step": 116000
},
{
"epoch": 3.31,
"learning_rate": 8.63043663532286e-06,
"loss": 0.6892,
"step": 116500
},
{
"epoch": 3.32,
"learning_rate": 8.452884861225534e-06,
"loss": 0.6987,
"step": 117000
},
{
"epoch": 3.34,
"learning_rate": 8.275333087128207e-06,
"loss": 0.7161,
"step": 117500
},
{
"epoch": 3.35,
"learning_rate": 8.09778131303088e-06,
"loss": 0.7185,
"step": 118000
},
{
"epoch": 3.37,
"learning_rate": 7.920229538933552e-06,
"loss": 0.7227,
"step": 118500
},
{
"epoch": 3.38,
"learning_rate": 7.742677764836228e-06,
"loss": 0.6908,
"step": 119000
},
{
"epoch": 3.39,
"learning_rate": 7.5651259907389e-06,
"loss": 0.7278,
"step": 119500
},
{
"epoch": 3.41,
"learning_rate": 7.387574216641572e-06,
"loss": 0.7548,
"step": 120000
},
{
"epoch": 3.42,
"learning_rate": 7.210022442544247e-06,
"loss": 0.7345,
"step": 120500
},
{
"epoch": 3.44,
"learning_rate": 7.032470668446919e-06,
"loss": 0.6865,
"step": 121000
},
{
"epoch": 3.45,
"learning_rate": 6.854918894349592e-06,
"loss": 0.7485,
"step": 121500
},
{
"epoch": 3.47,
"learning_rate": 6.677367120252266e-06,
"loss": 0.7195,
"step": 122000
},
{
"epoch": 3.48,
"learning_rate": 6.499815346154939e-06,
"loss": 0.7444,
"step": 122500
},
{
"epoch": 3.49,
"learning_rate": 6.322263572057613e-06,
"loss": 0.736,
"step": 123000
},
{
"epoch": 3.51,
"learning_rate": 6.144711797960286e-06,
"loss": 0.7284,
"step": 123500
},
{
"epoch": 3.52,
"learning_rate": 5.967160023862959e-06,
"loss": 0.7029,
"step": 124000
},
{
"epoch": 3.54,
"learning_rate": 5.7896082497656316e-06,
"loss": 0.7262,
"step": 124500
},
{
"epoch": 3.55,
"learning_rate": 5.612056475668305e-06,
"loss": 0.7153,
"step": 125000
},
{
"epoch": 3.57,
"learning_rate": 5.434504701570978e-06,
"loss": 0.7528,
"step": 125500
},
{
"epoch": 3.58,
"learning_rate": 5.256952927473651e-06,
"loss": 0.7051,
"step": 126000
},
{
"epoch": 3.59,
"learning_rate": 5.079401153376324e-06,
"loss": 0.7278,
"step": 126500
},
{
"epoch": 3.61,
"learning_rate": 4.901849379278998e-06,
"loss": 0.7206,
"step": 127000
},
{
"epoch": 3.62,
"learning_rate": 4.724297605181671e-06,
"loss": 0.7086,
"step": 127500
},
{
"epoch": 3.64,
"learning_rate": 4.546745831084345e-06,
"loss": 0.7092,
"step": 128000
},
{
"epoch": 3.65,
"learning_rate": 4.369194056987018e-06,
"loss": 0.7109,
"step": 128500
},
{
"epoch": 3.66,
"learning_rate": 4.191642282889691e-06,
"loss": 0.7321,
"step": 129000
},
{
"epoch": 3.68,
"learning_rate": 4.014090508792364e-06,
"loss": 0.7008,
"step": 129500
},
{
"epoch": 3.69,
"learning_rate": 3.836538734695038e-06,
"loss": 0.6671,
"step": 130000
},
{
"epoch": 3.71,
"learning_rate": 3.6589869605977107e-06,
"loss": 0.7047,
"step": 130500
},
{
"epoch": 3.72,
"learning_rate": 3.4814351865003836e-06,
"loss": 0.7033,
"step": 131000
},
{
"epoch": 3.74,
"learning_rate": 3.303883412403057e-06,
"loss": 0.7384,
"step": 131500
},
{
"epoch": 3.75,
"learning_rate": 3.12633163830573e-06,
"loss": 0.7303,
"step": 132000
},
{
"epoch": 3.76,
"learning_rate": 2.9487798642084034e-06,
"loss": 0.7056,
"step": 132500
},
{
"epoch": 3.78,
"learning_rate": 2.771228090111077e-06,
"loss": 0.7186,
"step": 133000
},
{
"epoch": 3.79,
"learning_rate": 2.5936763160137498e-06,
"loss": 0.7086,
"step": 133500
},
{
"epoch": 3.81,
"learning_rate": 2.416124541916423e-06,
"loss": 0.7213,
"step": 134000
},
{
"epoch": 3.82,
"learning_rate": 2.238572767819096e-06,
"loss": 0.6589,
"step": 134500
},
{
"epoch": 3.84,
"learning_rate": 2.0610209937217696e-06,
"loss": 0.7237,
"step": 135000
},
{
"epoch": 3.85,
"learning_rate": 1.8834692196244425e-06,
"loss": 0.711,
"step": 135500
},
{
"epoch": 3.86,
"learning_rate": 1.705917445527116e-06,
"loss": 0.7029,
"step": 136000
},
{
"epoch": 3.88,
"learning_rate": 1.5283656714297891e-06,
"loss": 0.7168,
"step": 136500
},
{
"epoch": 3.89,
"learning_rate": 1.3508138973324623e-06,
"loss": 0.6644,
"step": 137000
},
{
"epoch": 3.91,
"learning_rate": 1.1732621232351355e-06,
"loss": 0.6715,
"step": 137500
},
{
"epoch": 3.92,
"learning_rate": 9.957103491378087e-07,
"loss": 0.6871,
"step": 138000
},
{
"epoch": 3.93,
"learning_rate": 8.181585750404819e-07,
"loss": 0.6982,
"step": 138500
},
{
"epoch": 3.95,
"learning_rate": 6.40606800943155e-07,
"loss": 0.6891,
"step": 139000
},
{
"epoch": 3.96,
"learning_rate": 4.6305502684582823e-07,
"loss": 0.6792,
"step": 139500
},
{
"epoch": 3.98,
"learning_rate": 2.855032527485015e-07,
"loss": 0.7221,
"step": 140000
},
{
"epoch": 3.99,
"learning_rate": 1.0795147865117468e-07,
"loss": 0.7181,
"step": 140500
},
{
"epoch": 4.0,
"step": 140804,
"total_flos": 8.66406121808855e+16,
"train_loss": 1.5602563560180023,
"train_runtime": 41389.8114,
"train_samples_per_second": 3.402,
"train_steps_per_second": 3.402
}
],
"max_steps": 140804,
"num_train_epochs": 4,
"total_flos": 8.66406121808855e+16,
"trial_name": null,
"trial_params": null
}