het5_summarization / trainer_state.json
imvladikon's picture
init
b7d1a8f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 143557,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.982585314544049e-05,
"loss": 1.7947,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.965170629088098e-05,
"loss": 1.8219,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.947755943632146e-05,
"loss": 1.767,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.9303412581761946e-05,
"loss": 1.7913,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 4.912926572720244e-05,
"loss": 1.7769,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 4.8955118872642925e-05,
"loss": 1.7757,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 4.878097201808341e-05,
"loss": 1.7965,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 4.8606825163523904e-05,
"loss": 1.7883,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 4.843267830896438e-05,
"loss": 1.7735,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 4.825853145440487e-05,
"loss": 1.7864,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 4.808438459984536e-05,
"loss": 1.7609,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 4.791023774528585e-05,
"loss": 1.7905,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 4.773609089072633e-05,
"loss": 1.7734,
"step": 6500
},
{
"epoch": 0.05,
"learning_rate": 4.756194403616682e-05,
"loss": 1.7594,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 4.7387797181607305e-05,
"loss": 1.7439,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 4.721365032704779e-05,
"loss": 1.7435,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 4.7039503472488284e-05,
"loss": 1.7564,
"step": 8500
},
{
"epoch": 0.06,
"learning_rate": 4.686535661792877e-05,
"loss": 1.7824,
"step": 9000
},
{
"epoch": 0.07,
"learning_rate": 4.6691209763369256e-05,
"loss": 1.7561,
"step": 9500
},
{
"epoch": 0.07,
"learning_rate": 4.651706290880974e-05,
"loss": 1.7787,
"step": 10000
},
{
"epoch": 0.07,
"learning_rate": 4.634291605425023e-05,
"loss": 1.761,
"step": 10500
},
{
"epoch": 0.08,
"learning_rate": 4.616876919969072e-05,
"loss": 1.7324,
"step": 11000
},
{
"epoch": 0.08,
"learning_rate": 4.5994622345131206e-05,
"loss": 1.729,
"step": 11500
},
{
"epoch": 0.08,
"learning_rate": 4.582047549057169e-05,
"loss": 1.7545,
"step": 12000
},
{
"epoch": 0.09,
"learning_rate": 4.564632863601218e-05,
"loss": 1.7719,
"step": 12500
},
{
"epoch": 0.09,
"learning_rate": 4.5472181781452664e-05,
"loss": 1.7541,
"step": 13000
},
{
"epoch": 0.09,
"learning_rate": 4.529803492689315e-05,
"loss": 1.7212,
"step": 13500
},
{
"epoch": 0.1,
"learning_rate": 4.512388807233364e-05,
"loss": 1.7198,
"step": 14000
},
{
"epoch": 0.1,
"learning_rate": 4.494974121777413e-05,
"loss": 1.746,
"step": 14500
},
{
"epoch": 0.1,
"learning_rate": 4.4775594363214615e-05,
"loss": 1.7334,
"step": 15000
},
{
"epoch": 0.11,
"learning_rate": 4.46014475086551e-05,
"loss": 1.6989,
"step": 15500
},
{
"epoch": 0.11,
"learning_rate": 4.442730065409559e-05,
"loss": 1.7361,
"step": 16000
},
{
"epoch": 0.11,
"learning_rate": 4.425315379953607e-05,
"loss": 1.7046,
"step": 16500
},
{
"epoch": 0.12,
"learning_rate": 4.4079006944976565e-05,
"loss": 1.7274,
"step": 17000
},
{
"epoch": 0.12,
"learning_rate": 4.390486009041705e-05,
"loss": 1.7163,
"step": 17500
},
{
"epoch": 0.13,
"learning_rate": 4.373071323585754e-05,
"loss": 1.7321,
"step": 18000
},
{
"epoch": 0.13,
"learning_rate": 4.355656638129802e-05,
"loss": 1.726,
"step": 18500
},
{
"epoch": 0.13,
"learning_rate": 4.338241952673851e-05,
"loss": 1.7241,
"step": 19000
},
{
"epoch": 0.14,
"learning_rate": 4.3208272672178995e-05,
"loss": 1.7183,
"step": 19500
},
{
"epoch": 0.14,
"learning_rate": 4.303412581761949e-05,
"loss": 1.7036,
"step": 20000
},
{
"epoch": 0.14,
"learning_rate": 4.2859978963059974e-05,
"loss": 1.7265,
"step": 20500
},
{
"epoch": 0.15,
"learning_rate": 4.268583210850046e-05,
"loss": 1.7071,
"step": 21000
},
{
"epoch": 0.15,
"learning_rate": 4.2511685253940946e-05,
"loss": 1.7131,
"step": 21500
},
{
"epoch": 0.15,
"learning_rate": 4.233753839938143e-05,
"loss": 1.7175,
"step": 22000
},
{
"epoch": 0.16,
"learning_rate": 4.216339154482192e-05,
"loss": 1.7043,
"step": 22500
},
{
"epoch": 0.16,
"learning_rate": 4.198924469026241e-05,
"loss": 1.696,
"step": 23000
},
{
"epoch": 0.16,
"learning_rate": 4.1815097835702896e-05,
"loss": 1.7116,
"step": 23500
},
{
"epoch": 0.17,
"learning_rate": 4.1640950981143375e-05,
"loss": 1.7246,
"step": 24000
},
{
"epoch": 0.17,
"learning_rate": 4.146680412658387e-05,
"loss": 1.6985,
"step": 24500
},
{
"epoch": 0.17,
"learning_rate": 4.1292657272024354e-05,
"loss": 1.6928,
"step": 25000
},
{
"epoch": 0.18,
"learning_rate": 4.111851041746484e-05,
"loss": 1.6823,
"step": 25500
},
{
"epoch": 0.18,
"learning_rate": 4.094436356290533e-05,
"loss": 1.6996,
"step": 26000
},
{
"epoch": 0.18,
"learning_rate": 4.077021670834581e-05,
"loss": 1.7144,
"step": 26500
},
{
"epoch": 0.19,
"learning_rate": 4.05960698537863e-05,
"loss": 1.6874,
"step": 27000
},
{
"epoch": 0.19,
"learning_rate": 4.042192299922679e-05,
"loss": 1.6869,
"step": 27500
},
{
"epoch": 0.2,
"learning_rate": 4.0247776144667277e-05,
"loss": 1.7124,
"step": 28000
},
{
"epoch": 0.2,
"learning_rate": 4.007362929010776e-05,
"loss": 1.6995,
"step": 28500
},
{
"epoch": 0.2,
"learning_rate": 3.9899482435548255e-05,
"loss": 1.6845,
"step": 29000
},
{
"epoch": 0.21,
"learning_rate": 3.9725335580988734e-05,
"loss": 1.6923,
"step": 29500
},
{
"epoch": 0.21,
"learning_rate": 3.955118872642922e-05,
"loss": 1.6972,
"step": 30000
},
{
"epoch": 0.21,
"learning_rate": 3.937704187186971e-05,
"loss": 1.7031,
"step": 30500
},
{
"epoch": 0.22,
"learning_rate": 3.92028950173102e-05,
"loss": 1.6778,
"step": 31000
},
{
"epoch": 0.22,
"learning_rate": 3.902874816275069e-05,
"loss": 1.6946,
"step": 31500
},
{
"epoch": 0.22,
"learning_rate": 3.885460130819117e-05,
"loss": 1.71,
"step": 32000
},
{
"epoch": 0.23,
"learning_rate": 3.868045445363166e-05,
"loss": 1.68,
"step": 32500
},
{
"epoch": 0.23,
"learning_rate": 3.850630759907215e-05,
"loss": 1.7091,
"step": 33000
},
{
"epoch": 0.23,
"learning_rate": 3.8332160744512636e-05,
"loss": 1.6899,
"step": 33500
},
{
"epoch": 0.24,
"learning_rate": 3.815801388995312e-05,
"loss": 1.7072,
"step": 34000
},
{
"epoch": 0.24,
"learning_rate": 3.7983867035393614e-05,
"loss": 1.692,
"step": 34500
},
{
"epoch": 0.24,
"learning_rate": 3.7809720180834093e-05,
"loss": 1.6768,
"step": 35000
},
{
"epoch": 0.25,
"learning_rate": 3.763557332627458e-05,
"loss": 1.7081,
"step": 35500
},
{
"epoch": 0.25,
"learning_rate": 3.746142647171507e-05,
"loss": 1.6939,
"step": 36000
},
{
"epoch": 0.25,
"learning_rate": 3.728727961715556e-05,
"loss": 1.6794,
"step": 36500
},
{
"epoch": 0.26,
"learning_rate": 3.7113132762596044e-05,
"loss": 1.6616,
"step": 37000
},
{
"epoch": 0.26,
"learning_rate": 3.693898590803653e-05,
"loss": 1.6783,
"step": 37500
},
{
"epoch": 0.26,
"learning_rate": 3.6764839053477016e-05,
"loss": 1.6643,
"step": 38000
},
{
"epoch": 0.27,
"learning_rate": 3.65906921989175e-05,
"loss": 1.6748,
"step": 38500
},
{
"epoch": 0.27,
"learning_rate": 3.6416545344357995e-05,
"loss": 1.6706,
"step": 39000
},
{
"epoch": 0.28,
"learning_rate": 3.624239848979848e-05,
"loss": 1.695,
"step": 39500
},
{
"epoch": 0.28,
"learning_rate": 3.6068251635238967e-05,
"loss": 1.649,
"step": 40000
},
{
"epoch": 0.28,
"learning_rate": 3.589410478067945e-05,
"loss": 1.6785,
"step": 40500
},
{
"epoch": 0.29,
"learning_rate": 3.571995792611994e-05,
"loss": 1.6761,
"step": 41000
},
{
"epoch": 0.29,
"learning_rate": 3.5545811071560424e-05,
"loss": 1.6848,
"step": 41500
},
{
"epoch": 0.29,
"learning_rate": 3.537166421700092e-05,
"loss": 1.6633,
"step": 42000
},
{
"epoch": 0.3,
"learning_rate": 3.51975173624414e-05,
"loss": 1.6756,
"step": 42500
},
{
"epoch": 0.3,
"learning_rate": 3.502337050788189e-05,
"loss": 1.6778,
"step": 43000
},
{
"epoch": 0.3,
"learning_rate": 3.4849223653322375e-05,
"loss": 1.7106,
"step": 43500
},
{
"epoch": 0.31,
"learning_rate": 3.467507679876286e-05,
"loss": 1.706,
"step": 44000
},
{
"epoch": 0.31,
"learning_rate": 3.450092994420335e-05,
"loss": 1.6712,
"step": 44500
},
{
"epoch": 0.31,
"learning_rate": 3.432678308964384e-05,
"loss": 1.6721,
"step": 45000
},
{
"epoch": 0.32,
"learning_rate": 3.4152636235084326e-05,
"loss": 1.6818,
"step": 45500
},
{
"epoch": 0.32,
"learning_rate": 3.397848938052481e-05,
"loss": 1.6926,
"step": 46000
},
{
"epoch": 0.32,
"learning_rate": 3.38043425259653e-05,
"loss": 1.7093,
"step": 46500
},
{
"epoch": 0.33,
"learning_rate": 3.3630195671405783e-05,
"loss": 1.6835,
"step": 47000
},
{
"epoch": 0.33,
"learning_rate": 3.345604881684627e-05,
"loss": 1.6631,
"step": 47500
},
{
"epoch": 0.33,
"learning_rate": 3.328190196228676e-05,
"loss": 1.6754,
"step": 48000
},
{
"epoch": 0.34,
"learning_rate": 3.310775510772725e-05,
"loss": 1.6999,
"step": 48500
},
{
"epoch": 0.34,
"learning_rate": 3.293360825316773e-05,
"loss": 1.679,
"step": 49000
},
{
"epoch": 0.34,
"learning_rate": 3.275946139860822e-05,
"loss": 1.6596,
"step": 49500
},
{
"epoch": 0.35,
"learning_rate": 3.2585314544048706e-05,
"loss": 1.6628,
"step": 50000
},
{
"epoch": 0.35,
"learning_rate": 3.241116768948919e-05,
"loss": 1.7036,
"step": 50500
},
{
"epoch": 0.36,
"learning_rate": 3.2237020834929685e-05,
"loss": 1.6749,
"step": 51000
},
{
"epoch": 0.36,
"learning_rate": 3.206287398037017e-05,
"loss": 1.7121,
"step": 51500
},
{
"epoch": 0.36,
"learning_rate": 3.1888727125810656e-05,
"loss": 1.6733,
"step": 52000
},
{
"epoch": 0.37,
"learning_rate": 3.171458027125114e-05,
"loss": 1.6852,
"step": 52500
},
{
"epoch": 0.37,
"learning_rate": 3.154043341669163e-05,
"loss": 1.7159,
"step": 53000
},
{
"epoch": 0.37,
"learning_rate": 3.136628656213212e-05,
"loss": 1.6551,
"step": 53500
},
{
"epoch": 0.38,
"learning_rate": 3.119213970757261e-05,
"loss": 1.6456,
"step": 54000
},
{
"epoch": 0.38,
"learning_rate": 3.1017992853013086e-05,
"loss": 1.6491,
"step": 54500
},
{
"epoch": 0.38,
"learning_rate": 3.084384599845358e-05,
"loss": 1.6779,
"step": 55000
},
{
"epoch": 0.39,
"learning_rate": 3.0669699143894065e-05,
"loss": 1.6658,
"step": 55500
},
{
"epoch": 0.39,
"learning_rate": 3.049555228933455e-05,
"loss": 1.6845,
"step": 56000
},
{
"epoch": 0.39,
"learning_rate": 3.032140543477504e-05,
"loss": 1.6412,
"step": 56500
},
{
"epoch": 0.4,
"learning_rate": 3.0147258580215526e-05,
"loss": 1.6705,
"step": 57000
},
{
"epoch": 0.4,
"learning_rate": 2.9973111725656012e-05,
"loss": 1.67,
"step": 57500
},
{
"epoch": 0.4,
"learning_rate": 2.97989648710965e-05,
"loss": 1.6789,
"step": 58000
},
{
"epoch": 0.41,
"learning_rate": 2.9624818016536987e-05,
"loss": 1.6805,
"step": 58500
},
{
"epoch": 0.41,
"learning_rate": 2.9450671161977473e-05,
"loss": 1.6813,
"step": 59000
},
{
"epoch": 0.41,
"learning_rate": 2.9276524307417963e-05,
"loss": 1.6665,
"step": 59500
},
{
"epoch": 0.42,
"learning_rate": 2.910237745285845e-05,
"loss": 1.6306,
"step": 60000
},
{
"epoch": 0.42,
"learning_rate": 2.8928230598298935e-05,
"loss": 1.6507,
"step": 60500
},
{
"epoch": 0.42,
"learning_rate": 2.8754083743739424e-05,
"loss": 1.6701,
"step": 61000
},
{
"epoch": 0.43,
"learning_rate": 2.857993688917991e-05,
"loss": 1.6697,
"step": 61500
},
{
"epoch": 0.43,
"learning_rate": 2.8405790034620396e-05,
"loss": 1.6367,
"step": 62000
},
{
"epoch": 0.44,
"learning_rate": 2.8231643180060885e-05,
"loss": 1.7025,
"step": 62500
},
{
"epoch": 0.44,
"learning_rate": 2.805749632550137e-05,
"loss": 1.6884,
"step": 63000
},
{
"epoch": 0.44,
"learning_rate": 2.7883349470941854e-05,
"loss": 1.661,
"step": 63500
},
{
"epoch": 0.45,
"learning_rate": 2.7709202616382346e-05,
"loss": 1.6409,
"step": 64000
},
{
"epoch": 0.45,
"learning_rate": 2.7535055761822832e-05,
"loss": 1.6778,
"step": 64500
},
{
"epoch": 0.45,
"learning_rate": 2.7360908907263315e-05,
"loss": 1.6358,
"step": 65000
},
{
"epoch": 0.46,
"learning_rate": 2.7186762052703808e-05,
"loss": 1.6901,
"step": 65500
},
{
"epoch": 0.46,
"learning_rate": 2.7012615198144294e-05,
"loss": 1.6721,
"step": 66000
},
{
"epoch": 0.46,
"learning_rate": 2.6838468343584776e-05,
"loss": 1.6772,
"step": 66500
},
{
"epoch": 0.47,
"learning_rate": 2.666432148902527e-05,
"loss": 1.6687,
"step": 67000
},
{
"epoch": 0.47,
"learning_rate": 2.649017463446575e-05,
"loss": 1.6879,
"step": 67500
},
{
"epoch": 0.47,
"learning_rate": 2.6316027779906237e-05,
"loss": 1.6787,
"step": 68000
},
{
"epoch": 0.48,
"learning_rate": 2.614188092534673e-05,
"loss": 1.6956,
"step": 68500
},
{
"epoch": 0.48,
"learning_rate": 2.5967734070787213e-05,
"loss": 1.6805,
"step": 69000
},
{
"epoch": 0.48,
"learning_rate": 2.57935872162277e-05,
"loss": 1.681,
"step": 69500
},
{
"epoch": 0.49,
"learning_rate": 2.561944036166819e-05,
"loss": 1.6712,
"step": 70000
},
{
"epoch": 0.49,
"learning_rate": 2.5445293507108674e-05,
"loss": 1.697,
"step": 70500
},
{
"epoch": 0.49,
"learning_rate": 2.527114665254916e-05,
"loss": 1.6404,
"step": 71000
},
{
"epoch": 0.5,
"learning_rate": 2.5096999797989653e-05,
"loss": 1.6512,
"step": 71500
},
{
"epoch": 0.5,
"learning_rate": 2.4922852943430135e-05,
"loss": 1.689,
"step": 72000
},
{
"epoch": 0.51,
"learning_rate": 2.4748706088870624e-05,
"loss": 1.6408,
"step": 72500
},
{
"epoch": 0.51,
"learning_rate": 2.457455923431111e-05,
"loss": 1.6889,
"step": 73000
},
{
"epoch": 0.51,
"learning_rate": 2.4400412379751596e-05,
"loss": 1.6548,
"step": 73500
},
{
"epoch": 0.52,
"learning_rate": 2.4226265525192086e-05,
"loss": 1.6332,
"step": 74000
},
{
"epoch": 0.52,
"learning_rate": 2.405211867063257e-05,
"loss": 1.6602,
"step": 74500
},
{
"epoch": 0.52,
"learning_rate": 2.3877971816073058e-05,
"loss": 1.6895,
"step": 75000
},
{
"epoch": 0.53,
"learning_rate": 2.3703824961513547e-05,
"loss": 1.6763,
"step": 75500
},
{
"epoch": 0.53,
"learning_rate": 2.3529678106954033e-05,
"loss": 1.6317,
"step": 76000
},
{
"epoch": 0.53,
"learning_rate": 2.335553125239452e-05,
"loss": 1.6614,
"step": 76500
},
{
"epoch": 0.54,
"learning_rate": 2.3181384397835008e-05,
"loss": 1.6459,
"step": 77000
},
{
"epoch": 0.54,
"learning_rate": 2.3007237543275494e-05,
"loss": 1.6443,
"step": 77500
},
{
"epoch": 0.54,
"learning_rate": 2.2833090688715983e-05,
"loss": 1.6391,
"step": 78000
},
{
"epoch": 0.55,
"learning_rate": 2.265894383415647e-05,
"loss": 1.6689,
"step": 78500
},
{
"epoch": 0.55,
"learning_rate": 2.2484796979596955e-05,
"loss": 1.6879,
"step": 79000
},
{
"epoch": 0.55,
"learning_rate": 2.2310650125037445e-05,
"loss": 1.656,
"step": 79500
},
{
"epoch": 0.56,
"learning_rate": 2.213650327047793e-05,
"loss": 1.673,
"step": 80000
},
{
"epoch": 0.56,
"learning_rate": 2.1962356415918417e-05,
"loss": 1.849,
"step": 80500
},
{
"epoch": 0.56,
"learning_rate": 2.1788209561358906e-05,
"loss": 1.8576,
"step": 81000
},
{
"epoch": 0.57,
"learning_rate": 2.161406270679939e-05,
"loss": 1.8498,
"step": 81500
},
{
"epoch": 0.57,
"learning_rate": 2.1439915852239878e-05,
"loss": 1.8378,
"step": 82000
},
{
"epoch": 0.57,
"learning_rate": 2.1265768997680367e-05,
"loss": 1.8694,
"step": 82500
},
{
"epoch": 0.58,
"learning_rate": 2.109162214312085e-05,
"loss": 1.8587,
"step": 83000
},
{
"epoch": 0.58,
"learning_rate": 2.091747528856134e-05,
"loss": 1.8457,
"step": 83500
},
{
"epoch": 0.59,
"learning_rate": 2.074332843400183e-05,
"loss": 1.879,
"step": 84000
},
{
"epoch": 0.59,
"learning_rate": 2.056918157944231e-05,
"loss": 1.8394,
"step": 84500
},
{
"epoch": 0.59,
"learning_rate": 2.03950347248828e-05,
"loss": 1.8407,
"step": 85000
},
{
"epoch": 0.6,
"learning_rate": 2.0220887870323286e-05,
"loss": 1.8038,
"step": 85500
},
{
"epoch": 0.6,
"learning_rate": 2.0046741015763772e-05,
"loss": 1.8535,
"step": 86000
},
{
"epoch": 0.6,
"learning_rate": 1.987259416120426e-05,
"loss": 1.8292,
"step": 86500
},
{
"epoch": 0.61,
"learning_rate": 1.9698447306644748e-05,
"loss": 1.8859,
"step": 87000
},
{
"epoch": 0.61,
"learning_rate": 1.9524300452085233e-05,
"loss": 1.8197,
"step": 87500
},
{
"epoch": 0.61,
"learning_rate": 1.9350153597525723e-05,
"loss": 1.8465,
"step": 88000
},
{
"epoch": 0.62,
"learning_rate": 1.917600674296621e-05,
"loss": 1.8532,
"step": 88500
},
{
"epoch": 0.62,
"learning_rate": 1.9001859888406698e-05,
"loss": 1.863,
"step": 89000
},
{
"epoch": 0.62,
"learning_rate": 1.8827713033847184e-05,
"loss": 1.8414,
"step": 89500
},
{
"epoch": 0.63,
"learning_rate": 1.865356617928767e-05,
"loss": 1.8362,
"step": 90000
},
{
"epoch": 0.63,
"learning_rate": 1.847941932472816e-05,
"loss": 1.861,
"step": 90500
},
{
"epoch": 0.63,
"learning_rate": 1.8305272470168645e-05,
"loss": 1.8431,
"step": 91000
},
{
"epoch": 0.64,
"learning_rate": 1.813112561560913e-05,
"loss": 1.8449,
"step": 91500
},
{
"epoch": 0.64,
"learning_rate": 1.795697876104962e-05,
"loss": 1.8558,
"step": 92000
},
{
"epoch": 0.64,
"learning_rate": 1.7782831906490107e-05,
"loss": 1.8533,
"step": 92500
},
{
"epoch": 0.65,
"learning_rate": 1.7608685051930592e-05,
"loss": 1.8604,
"step": 93000
},
{
"epoch": 0.65,
"learning_rate": 1.7434538197371082e-05,
"loss": 1.8531,
"step": 93500
},
{
"epoch": 0.65,
"learning_rate": 1.7260391342811564e-05,
"loss": 1.8508,
"step": 94000
},
{
"epoch": 0.66,
"learning_rate": 1.7086244488252054e-05,
"loss": 1.8832,
"step": 94500
},
{
"epoch": 0.66,
"learning_rate": 1.6912097633692543e-05,
"loss": 1.8665,
"step": 95000
},
{
"epoch": 0.67,
"learning_rate": 1.6737950779133026e-05,
"loss": 1.8411,
"step": 95500
},
{
"epoch": 0.67,
"learning_rate": 1.6563803924573515e-05,
"loss": 1.842,
"step": 96000
},
{
"epoch": 0.67,
"learning_rate": 1.6389657070014004e-05,
"loss": 1.8422,
"step": 96500
},
{
"epoch": 0.68,
"learning_rate": 1.6215510215454487e-05,
"loss": 1.8227,
"step": 97000
},
{
"epoch": 0.68,
"learning_rate": 1.6041363360894976e-05,
"loss": 1.859,
"step": 97500
},
{
"epoch": 0.68,
"learning_rate": 1.5867216506335466e-05,
"loss": 1.8253,
"step": 98000
},
{
"epoch": 0.69,
"learning_rate": 1.5693069651775948e-05,
"loss": 1.8874,
"step": 98500
},
{
"epoch": 0.69,
"learning_rate": 1.5518922797216437e-05,
"loss": 1.8305,
"step": 99000
},
{
"epoch": 0.69,
"learning_rate": 1.5344775942656923e-05,
"loss": 1.8533,
"step": 99500
},
{
"epoch": 0.7,
"learning_rate": 1.5170629088097413e-05,
"loss": 1.8416,
"step": 100000
},
{
"epoch": 0.7,
"learning_rate": 1.4996482233537899e-05,
"loss": 1.8422,
"step": 100500
},
{
"epoch": 0.7,
"learning_rate": 1.4822335378978386e-05,
"loss": 1.8464,
"step": 101000
},
{
"epoch": 0.71,
"learning_rate": 1.4648188524418874e-05,
"loss": 1.818,
"step": 101500
},
{
"epoch": 0.71,
"learning_rate": 1.4474041669859358e-05,
"loss": 1.8483,
"step": 102000
},
{
"epoch": 0.71,
"learning_rate": 1.4299894815299848e-05,
"loss": 1.8505,
"step": 102500
},
{
"epoch": 0.72,
"learning_rate": 1.4125747960740335e-05,
"loss": 1.8335,
"step": 103000
},
{
"epoch": 0.72,
"learning_rate": 1.395160110618082e-05,
"loss": 1.853,
"step": 103500
},
{
"epoch": 0.72,
"learning_rate": 1.3777454251621307e-05,
"loss": 1.8434,
"step": 104000
},
{
"epoch": 0.73,
"learning_rate": 1.3603307397061796e-05,
"loss": 1.8637,
"step": 104500
},
{
"epoch": 0.73,
"learning_rate": 1.342916054250228e-05,
"loss": 1.8593,
"step": 105000
},
{
"epoch": 0.73,
"learning_rate": 1.3255013687942768e-05,
"loss": 1.8548,
"step": 105500
},
{
"epoch": 0.74,
"learning_rate": 1.3080866833383256e-05,
"loss": 1.8937,
"step": 106000
},
{
"epoch": 0.74,
"learning_rate": 1.2906719978823742e-05,
"loss": 1.8526,
"step": 106500
},
{
"epoch": 0.75,
"learning_rate": 1.273257312426423e-05,
"loss": 1.8718,
"step": 107000
},
{
"epoch": 0.75,
"learning_rate": 1.2558426269704717e-05,
"loss": 1.8332,
"step": 107500
},
{
"epoch": 0.75,
"learning_rate": 1.2384279415145205e-05,
"loss": 1.8443,
"step": 108000
},
{
"epoch": 0.76,
"learning_rate": 1.221013256058569e-05,
"loss": 1.8174,
"step": 108500
},
{
"epoch": 0.76,
"learning_rate": 1.2035985706026178e-05,
"loss": 1.8406,
"step": 109000
},
{
"epoch": 0.76,
"learning_rate": 1.1861838851466666e-05,
"loss": 1.8653,
"step": 109500
},
{
"epoch": 0.77,
"learning_rate": 1.1687691996907152e-05,
"loss": 1.8395,
"step": 110000
},
{
"epoch": 0.77,
"learning_rate": 1.151354514234764e-05,
"loss": 1.8608,
"step": 110500
},
{
"epoch": 0.77,
"learning_rate": 1.1339398287788126e-05,
"loss": 1.8242,
"step": 111000
},
{
"epoch": 0.78,
"learning_rate": 1.1165251433228613e-05,
"loss": 1.8436,
"step": 111500
},
{
"epoch": 0.78,
"learning_rate": 1.0991104578669101e-05,
"loss": 1.845,
"step": 112000
},
{
"epoch": 0.78,
"learning_rate": 1.0816957724109587e-05,
"loss": 1.8197,
"step": 112500
},
{
"epoch": 0.79,
"learning_rate": 1.0642810869550075e-05,
"loss": 1.837,
"step": 113000
},
{
"epoch": 0.79,
"learning_rate": 1.0468664014990562e-05,
"loss": 1.8245,
"step": 113500
},
{
"epoch": 0.79,
"learning_rate": 1.0294517160431048e-05,
"loss": 1.8535,
"step": 114000
},
{
"epoch": 0.8,
"learning_rate": 1.0120370305871536e-05,
"loss": 1.8367,
"step": 114500
},
{
"epoch": 0.8,
"learning_rate": 9.946223451312023e-06,
"loss": 1.8592,
"step": 115000
},
{
"epoch": 0.8,
"learning_rate": 9.77207659675251e-06,
"loss": 1.876,
"step": 115500
},
{
"epoch": 0.81,
"learning_rate": 9.597929742192997e-06,
"loss": 1.8435,
"step": 116000
},
{
"epoch": 0.81,
"learning_rate": 9.423782887633485e-06,
"loss": 1.8247,
"step": 116500
},
{
"epoch": 0.82,
"learning_rate": 9.249636033073972e-06,
"loss": 1.8369,
"step": 117000
},
{
"epoch": 0.82,
"learning_rate": 9.075489178514458e-06,
"loss": 1.8587,
"step": 117500
},
{
"epoch": 0.82,
"learning_rate": 8.901342323954944e-06,
"loss": 1.8935,
"step": 118000
},
{
"epoch": 0.83,
"learning_rate": 8.727195469395434e-06,
"loss": 1.8308,
"step": 118500
},
{
"epoch": 0.83,
"learning_rate": 8.55304861483592e-06,
"loss": 1.8474,
"step": 119000
},
{
"epoch": 0.83,
"learning_rate": 8.378901760276405e-06,
"loss": 1.8274,
"step": 119500
},
{
"epoch": 0.84,
"learning_rate": 8.204754905716893e-06,
"loss": 1.8682,
"step": 120000
},
{
"epoch": 0.84,
"learning_rate": 8.03060805115738e-06,
"loss": 1.8589,
"step": 120500
},
{
"epoch": 0.84,
"learning_rate": 7.856461196597867e-06,
"loss": 1.8726,
"step": 121000
},
{
"epoch": 0.85,
"learning_rate": 7.682314342038354e-06,
"loss": 1.8778,
"step": 121500
},
{
"epoch": 0.85,
"learning_rate": 7.508167487478841e-06,
"loss": 1.8435,
"step": 122000
},
{
"epoch": 0.85,
"learning_rate": 7.33402063291933e-06,
"loss": 1.8639,
"step": 122500
},
{
"epoch": 0.86,
"learning_rate": 7.159873778359816e-06,
"loss": 1.8692,
"step": 123000
},
{
"epoch": 0.86,
"learning_rate": 6.985726923800302e-06,
"loss": 1.854,
"step": 123500
},
{
"epoch": 0.86,
"learning_rate": 6.81158006924079e-06,
"loss": 1.8519,
"step": 124000
},
{
"epoch": 0.87,
"learning_rate": 6.637433214681277e-06,
"loss": 1.8521,
"step": 124500
},
{
"epoch": 0.87,
"learning_rate": 6.463286360121764e-06,
"loss": 1.8729,
"step": 125000
},
{
"epoch": 0.87,
"learning_rate": 6.289139505562251e-06,
"loss": 1.869,
"step": 125500
},
{
"epoch": 0.88,
"learning_rate": 6.114992651002738e-06,
"loss": 1.8853,
"step": 126000
},
{
"epoch": 0.88,
"learning_rate": 5.940845796443225e-06,
"loss": 1.8851,
"step": 126500
},
{
"epoch": 0.88,
"learning_rate": 5.7666989418837125e-06,
"loss": 1.8595,
"step": 127000
},
{
"epoch": 0.89,
"learning_rate": 5.5925520873241985e-06,
"loss": 1.8641,
"step": 127500
},
{
"epoch": 0.89,
"learning_rate": 5.418405232764686e-06,
"loss": 1.8521,
"step": 128000
},
{
"epoch": 0.9,
"learning_rate": 5.244258378205173e-06,
"loss": 1.8781,
"step": 128500
},
{
"epoch": 0.9,
"learning_rate": 5.0701115236456605e-06,
"loss": 1.8683,
"step": 129000
},
{
"epoch": 0.9,
"learning_rate": 4.895964669086147e-06,
"loss": 1.8362,
"step": 129500
},
{
"epoch": 0.91,
"learning_rate": 4.721817814526634e-06,
"loss": 1.8662,
"step": 130000
},
{
"epoch": 0.91,
"learning_rate": 4.547670959967122e-06,
"loss": 1.8399,
"step": 130500
},
{
"epoch": 0.91,
"learning_rate": 4.373524105407608e-06,
"loss": 1.8408,
"step": 131000
},
{
"epoch": 0.92,
"learning_rate": 4.199377250848095e-06,
"loss": 1.8687,
"step": 131500
},
{
"epoch": 0.92,
"learning_rate": 4.025230396288582e-06,
"loss": 1.8558,
"step": 132000
},
{
"epoch": 0.92,
"learning_rate": 3.85108354172907e-06,
"loss": 1.8609,
"step": 132500
},
{
"epoch": 0.93,
"learning_rate": 3.676936687169556e-06,
"loss": 1.8587,
"step": 133000
},
{
"epoch": 0.93,
"learning_rate": 3.5027898326100434e-06,
"loss": 1.8561,
"step": 133500
},
{
"epoch": 0.93,
"learning_rate": 3.3286429780505306e-06,
"loss": 1.8798,
"step": 134000
},
{
"epoch": 0.94,
"learning_rate": 3.154496123491018e-06,
"loss": 1.8781,
"step": 134500
},
{
"epoch": 0.94,
"learning_rate": 2.9803492689315047e-06,
"loss": 1.8639,
"step": 135000
},
{
"epoch": 0.94,
"learning_rate": 2.806202414371992e-06,
"loss": 1.848,
"step": 135500
},
{
"epoch": 0.95,
"learning_rate": 2.632055559812479e-06,
"loss": 1.8278,
"step": 136000
},
{
"epoch": 0.95,
"learning_rate": 2.457908705252966e-06,
"loss": 1.8527,
"step": 136500
},
{
"epoch": 0.95,
"learning_rate": 2.2837618506934527e-06,
"loss": 1.8564,
"step": 137000
},
{
"epoch": 0.96,
"learning_rate": 2.10961499613394e-06,
"loss": 1.8615,
"step": 137500
},
{
"epoch": 0.96,
"learning_rate": 1.9354681415744267e-06,
"loss": 1.8595,
"step": 138000
},
{
"epoch": 0.96,
"learning_rate": 1.7613212870149141e-06,
"loss": 1.8696,
"step": 138500
},
{
"epoch": 0.97,
"learning_rate": 1.587174432455401e-06,
"loss": 1.8487,
"step": 139000
},
{
"epoch": 0.97,
"learning_rate": 1.4130275778958882e-06,
"loss": 1.849,
"step": 139500
},
{
"epoch": 0.98,
"learning_rate": 1.2388807233363752e-06,
"loss": 1.9932,
"step": 140000
},
{
"epoch": 0.98,
"learning_rate": 1.0647338687768622e-06,
"loss": 2.0002,
"step": 140500
},
{
"epoch": 0.98,
"learning_rate": 8.905870142173492e-07,
"loss": 2.0011,
"step": 141000
},
{
"epoch": 0.99,
"learning_rate": 7.164401596578363e-07,
"loss": 2.0291,
"step": 141500
},
{
"epoch": 0.99,
"learning_rate": 5.422933050983233e-07,
"loss": 2.0218,
"step": 142000
},
{
"epoch": 0.99,
"learning_rate": 3.681464505388104e-07,
"loss": 2.0125,
"step": 142500
},
{
"epoch": 1.0,
"learning_rate": 1.9399959597929744e-07,
"loss": 2.0472,
"step": 143000
},
{
"epoch": 1.0,
"learning_rate": 1.985274141978448e-08,
"loss": 2.0334,
"step": 143500
},
{
"epoch": 1.0,
"eval_gen_len": 18.999925194494313,
"eval_loss": 1.7004536390304565,
"eval_rouge1": 14.8974,
"eval_rouge2": 3.7181,
"eval_rougeL": 14.7545,
"eval_rougeLsum": 14.8457,
"eval_runtime": 3615.3499,
"eval_samples_per_second": 3.698,
"eval_steps_per_second": 1.849,
"step": 143557
},
{
"epoch": 1.0,
"step": 143557,
"total_flos": 1.9503536341082112e+17,
"train_loss": 1.77077664958091,
"train_runtime": 65437.7493,
"train_samples_per_second": 4.388,
"train_steps_per_second": 2.194
}
],
"max_steps": 143557,
"num_train_epochs": 1,
"total_flos": 1.9503536341082112e+17,
"trial_name": null,
"trial_params": null
}