diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,3605 +1,6160 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 10.0, - "global_step": 28750, + "epoch": 3.0, + "global_step": 101625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.0831802676462875e-07, + "loss": 4.3168, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 6.363159275780635e-07, + "loss": 4.155, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 9.643138283914982e-07, + "loss": 3.7804, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.2890317501967988e-06, + "loss": 3.8055, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 1.6170296510102337e-06, + "loss": 3.6927, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9450275518236685e-06, + "loss": 3.5158, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 2.273025452637103e-06, + "loss": 3.6069, + "step": 700 + }, { "epoch": 0.02, - "learning_rate": 5.216786459540924e-07, - "loss": 4.1832, - "step": 50 + "learning_rate": 2.6010233534505377e-06, + "loss": 3.6429, + "step": 800 }, { "epoch": 0.03, - "learning_rate": 1.1013215859030839e-06, - "loss": 3.9055, - "step": 100 + "learning_rate": 2.929021254263973e-06, + "loss": 3.6292, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 3.257019155077408e-06, + "loss": 3.4969, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 3.585017055890842e-06, + "loss": 3.5082, + "step": 1100 + }, + { + "epoch": 0.04, + "learning_rate": 3.9130149567042775e-06, + "loss": 3.5132, + "step": 1200 + }, + { + "epoch": 0.04, + "learning_rate": 4.2410128575177125e-06, + "loss": 3.5413, + "step": 1300 + }, + { + "epoch": 0.04, + "learning_rate": 4.569010758331147e-06, + "loss": 3.4286, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 4.897008659144582e-06, + "loss": 3.4228, + "step": 1500 + }, + { + "epoch": 0.05, + "learning_rate": 5.225006559958016e-06, + "loss": 3.4743, + "step": 1600 + }, + { + "epoch": 0.05, + "learning_rate": 5.553004460771451e-06, + "loss": 3.4929, + "step": 1700 }, { "epoch": 0.05, - "learning_rate": 1.6809645258520752e-06, - "loss": 3.653, - "step": 150 + "learning_rate": 5.881002361584886e-06, + "loss": 3.4324, + "step": 1800 + }, + { + "epoch": 0.06, + "learning_rate": 6.20900026239832e-06, + "loss": 3.4384, + "step": 1900 + }, + { + "epoch": 0.06, + "learning_rate": 6.536998163211756e-06, + "loss": 3.488, + "step": 2000 + }, + { + "epoch": 0.06, + "learning_rate": 6.86499606402519e-06, + "loss": 3.3966, + "step": 2100 + }, + { + "epoch": 0.06, + "learning_rate": 7.1929939648386246e-06, + "loss": 3.4662, + "step": 2200 }, { "epoch": 0.07, - "learning_rate": 2.260607465801067e-06, - "loss": 3.5949, - "step": 200 + "learning_rate": 7.5209918656520605e-06, + "loss": 3.5339, + "step": 2300 + }, + { + "epoch": 0.07, + "learning_rate": 7.848989766465495e-06, + "loss": 3.3541, + "step": 2400 + }, + { + "epoch": 0.07, + "learning_rate": 8.17698766727893e-06, + "loss": 3.3954, + "step": 2500 + }, + { + "epoch": 0.08, + "learning_rate": 8.504985568092365e-06, + "loss": 3.3898, + "step": 2600 + }, + { + "epoch": 0.08, + "learning_rate": 8.8329834689058e-06, + "loss": 3.431, + "step": 2700 + }, + { + "epoch": 0.08, + "learning_rate": 9.160981369719235e-06, + "loss": 3.4336, + "step": 2800 + }, + { + "epoch": 0.09, + "learning_rate": 9.48897927053267e-06, + "loss": 3.4402, + "step": 2900 + }, + { + "epoch": 0.09, + "learning_rate": 9.816977171346103e-06, + "loss": 3.3912, + "step": 3000 + }, + { + "epoch": 0.09, + "learning_rate": 1.0144975072159538e-05, + "loss": 3.3787, + "step": 3100 }, { "epoch": 0.09, - "learning_rate": 2.8286575469510783e-06, - "loss": 3.5274, - "step": 250 + "learning_rate": 1.0472972972972973e-05, + "loss": 3.3998, + "step": 3200 }, { "epoch": 0.1, - "learning_rate": 3.40830048690007e-06, - "loss": 3.4424, - "step": 300 + "learning_rate": 1.0800970873786408e-05, + "loss": 3.4397, + "step": 3300 + }, + { + "epoch": 0.1, + "learning_rate": 1.1125688795591708e-05, + "loss": 3.313, + "step": 3400 + }, + { + "epoch": 0.1, + "learning_rate": 1.1453686696405144e-05, + "loss": 3.3708, + "step": 3500 + }, + { + "epoch": 0.11, + "learning_rate": 1.1781684597218578e-05, + "loss": 3.3924, + "step": 3600 + }, + { + "epoch": 0.11, + "learning_rate": 1.2109682498032013e-05, + "loss": 3.3792, + "step": 3700 + }, + { + "epoch": 0.11, + "learning_rate": 1.2437680398845448e-05, + "loss": 3.3953, + "step": 3800 + }, + { + "epoch": 0.12, + "learning_rate": 1.2765678299658885e-05, + "loss": 3.4286, + "step": 3900 + }, + { + "epoch": 0.12, + "learning_rate": 1.3093676200472318e-05, + "loss": 3.3751, + "step": 4000 + }, + { + "epoch": 0.12, + "learning_rate": 1.3421674101285753e-05, + "loss": 3.4209, + "step": 4100 }, { "epoch": 0.12, - "learning_rate": 3.987943426849061e-06, - "loss": 3.5065, - "step": 350 + "learning_rate": 1.3749672002099186e-05, + "loss": 3.3963, + "step": 4200 + }, + { + "epoch": 0.13, + "learning_rate": 1.4077669902912621e-05, + "loss": 3.4004, + "step": 4300 + }, + { + "epoch": 0.13, + "learning_rate": 1.4405667803726055e-05, + "loss": 3.3787, + "step": 4400 + }, + { + "epoch": 0.13, + "learning_rate": 1.4733665704539493e-05, + "loss": 3.3521, + "step": 4500 }, { "epoch": 0.14, - "learning_rate": 4.567586366798052e-06, - "loss": 3.4948, - "step": 400 + "learning_rate": 1.5061663605352927e-05, + "loss": 3.3486, + "step": 4600 + }, + { + "epoch": 0.14, + "learning_rate": 1.538966150616636e-05, + "loss": 3.3643, + "step": 4700 + }, + { + "epoch": 0.14, + "learning_rate": 1.5717659406979797e-05, + "loss": 3.4276, + "step": 4800 + }, + { + "epoch": 0.14, + "learning_rate": 1.604565730779323e-05, + "loss": 3.3843, + "step": 4900 + }, + { + "epoch": 0.15, + "learning_rate": 1.6373655208606667e-05, + "loss": 3.3808, + "step": 5000 + }, + { + "epoch": 0.15, + "learning_rate": 1.67016531094201e-05, + "loss": 3.4145, + "step": 5100 + }, + { + "epoch": 0.15, + "learning_rate": 1.7029651010233537e-05, + "loss": 3.4281, + "step": 5200 + }, + { + "epoch": 0.16, + "learning_rate": 1.735764891104697e-05, + "loss": 3.3493, + "step": 5300 + }, + { + "epoch": 0.16, + "learning_rate": 1.7685646811860404e-05, + "loss": 3.3366, + "step": 5400 }, { "epoch": 0.16, - "learning_rate": 5.147229306747045e-06, - "loss": 3.4344, - "step": 450 + "learning_rate": 1.801364471267384e-05, + "loss": 3.3874, + "step": 5500 }, { "epoch": 0.17, - "learning_rate": 5.7268722466960354e-06, - "loss": 3.5227, - "step": 500 + "learning_rate": 1.8341642613487277e-05, + "loss": 3.3963, + "step": 5600 + }, + { + "epoch": 0.17, + "learning_rate": 1.866964051430071e-05, + "loss": 3.4015, + "step": 5700 + }, + { + "epoch": 0.17, + "learning_rate": 1.8997638415114144e-05, + "loss": 3.4283, + "step": 5800 + }, + { + "epoch": 0.17, + "learning_rate": 1.9325636315927577e-05, + "loss": 3.3884, + "step": 5900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9653634216741014e-05, + "loss": 3.3508, + "step": 6000 + }, + { + "epoch": 0.18, + "learning_rate": 1.998163211755445e-05, + "loss": 3.3748, + "step": 6100 + }, + { + "epoch": 0.18, + "learning_rate": 2.0309630018367884e-05, + "loss": 3.3285, + "step": 6200 + }, + { + "epoch": 0.19, + "learning_rate": 2.0637627919181317e-05, + "loss": 3.3309, + "step": 6300 + }, + { + "epoch": 0.19, + "learning_rate": 2.0965625819994754e-05, + "loss": 3.3261, + "step": 6400 + }, + { + "epoch": 0.19, + "learning_rate": 2.1293623720808187e-05, + "loss": 3.3698, + "step": 6500 }, { "epoch": 0.19, - "learning_rate": 6.306515186645028e-06, - "loss": 3.3376, - "step": 550 + "learning_rate": 2.1621621621621624e-05, + "loss": 3.4237, + "step": 6600 + }, + { + "epoch": 0.2, + "learning_rate": 2.1949619522435057e-05, + "loss": 3.3434, + "step": 6700 + }, + { + "epoch": 0.2, + "learning_rate": 2.227761742324849e-05, + "loss": 3.3825, + "step": 6800 + }, + { + "epoch": 0.2, + "learning_rate": 2.2605615324061928e-05, + "loss": 3.4024, + "step": 6900 }, { "epoch": 0.21, - "learning_rate": 6.8861581265940184e-06, - "loss": 3.3889, - "step": 600 + "learning_rate": 2.293361322487536e-05, + "loss": 3.4105, + "step": 7000 + }, + { + "epoch": 0.21, + "learning_rate": 2.3261611125688798e-05, + "loss": 3.3623, + "step": 7100 + }, + { + "epoch": 0.21, + "learning_rate": 2.358960902650223e-05, + "loss": 3.3796, + "step": 7200 + }, + { + "epoch": 0.22, + "learning_rate": 2.3917606927315668e-05, + "loss": 3.4282, + "step": 7300 + }, + { + "epoch": 0.22, + "learning_rate": 2.42456048281291e-05, + "loss": 3.3423, + "step": 7400 + }, + { + "epoch": 0.22, + "learning_rate": 2.4573602728942534e-05, + "loss": 3.3964, + "step": 7500 + }, + { + "epoch": 0.22, + "learning_rate": 2.490160062975597e-05, + "loss": 3.3467, + "step": 7600 }, { "epoch": 0.23, - "learning_rate": 7.46580106654301e-06, - "loss": 3.262, - "step": 650 + "learning_rate": 2.5229598530569405e-05, + "loss": 3.3749, + "step": 7700 }, { - "epoch": 0.24, - "learning_rate": 8.045444006492002e-06, - "loss": 3.3727, - "step": 700 + "epoch": 0.23, + "learning_rate": 2.555759643138284e-05, + "loss": 3.3759, + "step": 7800 }, { - "epoch": 0.26, - "learning_rate": 8.625086946440993e-06, - "loss": 3.3277, - "step": 750 + "epoch": 0.23, + "learning_rate": 2.5885594332196278e-05, + "loss": 3.3565, + "step": 7900 }, { - "epoch": 0.28, - "learning_rate": 9.204729886389984e-06, - "loss": 3.3311, - "step": 800 + "epoch": 0.24, + "learning_rate": 2.6213592233009708e-05, + "loss": 3.3313, + "step": 8000 }, { - "epoch": 0.3, - "learning_rate": 9.784372826338974e-06, - "loss": 3.3332, - "step": 850 + "epoch": 0.24, + "learning_rate": 2.6541590133823148e-05, + "loss": 3.4114, + "step": 8100 + }, + { + "epoch": 0.24, + "learning_rate": 2.6869588034636578e-05, + "loss": 3.4161, + "step": 8200 + }, + { + "epoch": 0.25, + "learning_rate": 2.7197585935450015e-05, + "loss": 3.4302, + "step": 8300 + }, + { + "epoch": 0.25, + "learning_rate": 2.7525583836263448e-05, + "loss": 3.443, + "step": 8400 + }, + { + "epoch": 0.25, + "learning_rate": 2.7853581737076885e-05, + "loss": 3.358, + "step": 8500 + }, + { + "epoch": 0.25, + "learning_rate": 2.818157963789032e-05, + "loss": 3.381, + "step": 8600 + }, + { + "epoch": 0.26, + "learning_rate": 2.850957753870375e-05, + "loss": 3.3726, + "step": 8700 + }, + { + "epoch": 0.26, + "learning_rate": 2.883757543951719e-05, + "loss": 3.494, + "step": 8800 + }, + { + "epoch": 0.26, + "learning_rate": 2.9165573340330622e-05, + "loss": 3.4925, + "step": 8900 + }, + { + "epoch": 0.27, + "learning_rate": 2.949357124114406e-05, + "loss": 3.4443, + "step": 9000 + }, + { + "epoch": 0.27, + "learning_rate": 2.9821569141957495e-05, + "loss": 3.476, + "step": 9100 + }, + { + "epoch": 0.27, + "learning_rate": 3.0149567042770925e-05, + "loss": 3.4143, + "step": 9200 + }, + { + "epoch": 0.27, + "learning_rate": 3.0474284964576228e-05, + "loss": 3.3816, + "step": 9300 + }, + { + "epoch": 0.28, + "learning_rate": 3.080228286538966e-05, + "loss": 3.493, + "step": 9400 + }, + { + "epoch": 0.28, + "learning_rate": 3.1130280766203095e-05, + "loss": 3.4608, + "step": 9500 + }, + { + "epoch": 0.28, + "learning_rate": 3.1458278667016535e-05, + "loss": 3.3971, + "step": 9600 + }, + { + "epoch": 0.29, + "learning_rate": 3.178627656782997e-05, + "loss": 3.4157, + "step": 9700 + }, + { + "epoch": 0.29, + "learning_rate": 3.21142744686434e-05, + "loss": 3.3921, + "step": 9800 + }, + { + "epoch": 0.29, + "learning_rate": 3.244227236945684e-05, + "loss": 3.3785, + "step": 9900 + }, + { + "epoch": 0.3, + "learning_rate": 3.277027027027027e-05, + "loss": 3.3634, + "step": 10000 + }, + { + "epoch": 0.3, + "learning_rate": 3.309826817108371e-05, + "loss": 3.3881, + "step": 10100 + }, + { + "epoch": 0.3, + "learning_rate": 3.342626607189714e-05, + "loss": 3.362, + "step": 10200 + }, + { + "epoch": 0.3, + "learning_rate": 3.3754263972710575e-05, + "loss": 3.4395, + "step": 10300 + }, + { + "epoch": 0.31, + "learning_rate": 3.4082261873524015e-05, + "loss": 3.4151, + "step": 10400 + }, + { + "epoch": 0.31, + "learning_rate": 3.441025977433744e-05, + "loss": 3.3813, + "step": 10500 + }, + { + "epoch": 0.31, + "learning_rate": 3.473825767515088e-05, + "loss": 3.3624, + "step": 10600 + }, + { + "epoch": 0.32, + "learning_rate": 3.5066255575964315e-05, + "loss": 3.5004, + "step": 10700 + }, + { + "epoch": 0.32, + "learning_rate": 3.539425347677775e-05, + "loss": 3.373, + "step": 10800 + }, + { + "epoch": 0.32, + "learning_rate": 3.572225137759119e-05, + "loss": 3.4307, + "step": 10900 + }, + { + "epoch": 0.32, + "learning_rate": 3.605024927840462e-05, + "loss": 3.451, + "step": 11000 + }, + { + "epoch": 0.33, + "learning_rate": 3.6378247179218055e-05, + "loss": 3.3964, + "step": 11100 + }, + { + "epoch": 0.33, + "learning_rate": 3.670624508003149e-05, + "loss": 3.3537, + "step": 11200 + }, + { + "epoch": 0.33, + "learning_rate": 3.703424298084492e-05, + "loss": 3.469, + "step": 11300 + }, + { + "epoch": 0.34, + "learning_rate": 3.736224088165836e-05, + "loss": 3.4307, + "step": 11400 + }, + { + "epoch": 0.34, + "learning_rate": 3.7690238782471796e-05, + "loss": 3.4911, + "step": 11500 + }, + { + "epoch": 0.34, + "learning_rate": 3.801823668328523e-05, + "loss": 3.3976, + "step": 11600 + }, + { + "epoch": 0.35, + "learning_rate": 3.834623458409866e-05, + "loss": 3.4258, + "step": 11700 + }, + { + "epoch": 0.35, + "learning_rate": 3.8674232484912096e-05, + "loss": 3.4217, + "step": 11800 + }, + { + "epoch": 0.35, + "learning_rate": 3.9002230385725536e-05, + "loss": 3.4396, + "step": 11900 + }, + { + "epoch": 0.35, + "learning_rate": 3.933022828653897e-05, + "loss": 3.4841, + "step": 12000 + }, + { + "epoch": 0.36, + "learning_rate": 3.96582261873524e-05, + "loss": 3.4864, + "step": 12100 + }, + { + "epoch": 0.36, + "learning_rate": 3.9986224088165836e-05, + "loss": 3.4933, + "step": 12200 + }, + { + "epoch": 0.36, + "learning_rate": 4.0314221988979276e-05, + "loss": 3.4383, + "step": 12300 + }, + { + "epoch": 0.37, + "learning_rate": 4.064221988979271e-05, + "loss": 3.3793, + "step": 12400 + }, + { + "epoch": 0.37, + "learning_rate": 4.097021779060614e-05, + "loss": 3.4841, + "step": 12500 + }, + { + "epoch": 0.37, + "learning_rate": 4.1298215691419576e-05, + "loss": 3.482, + "step": 12600 + }, + { + "epoch": 0.37, + "learning_rate": 4.162621359223301e-05, + "loss": 3.4459, + "step": 12700 + }, + { + "epoch": 0.38, + "learning_rate": 4.195421149304645e-05, + "loss": 3.4549, + "step": 12800 + }, + { + "epoch": 0.38, + "learning_rate": 4.228220939385988e-05, + "loss": 3.5482, + "step": 12900 + }, + { + "epoch": 0.38, + "learning_rate": 4.2610207294673316e-05, + "loss": 3.4926, + "step": 13000 + }, + { + "epoch": 0.39, + "learning_rate": 4.293820519548675e-05, + "loss": 3.5195, + "step": 13100 + }, + { + "epoch": 0.39, + "learning_rate": 4.326620309630018e-05, + "loss": 3.5106, + "step": 13200 + }, + { + "epoch": 0.39, + "learning_rate": 4.359420099711362e-05, + "loss": 3.4536, + "step": 13300 + }, + { + "epoch": 0.4, + "learning_rate": 4.392219889792705e-05, + "loss": 3.4985, + "step": 13400 + }, + { + "epoch": 0.4, + "learning_rate": 4.425019679874049e-05, + "loss": 3.4308, + "step": 13500 + }, + { + "epoch": 0.4, + "learning_rate": 4.457491472054579e-05, + "loss": 3.4666, + "step": 13600 + }, + { + "epoch": 0.4, + "learning_rate": 4.4902912621359226e-05, + "loss": 3.5624, + "step": 13700 + }, + { + "epoch": 0.41, + "learning_rate": 4.523091052217266e-05, + "loss": 3.4941, + "step": 13800 + }, + { + "epoch": 0.41, + "learning_rate": 4.55589084229861e-05, + "loss": 3.4589, + "step": 13900 + }, + { + "epoch": 0.41, + "learning_rate": 4.5886906323799526e-05, + "loss": 3.5033, + "step": 14000 + }, + { + "epoch": 0.42, + "learning_rate": 4.6214904224612966e-05, + "loss": 3.4525, + "step": 14100 + }, + { + "epoch": 0.42, + "learning_rate": 4.65429021254264e-05, + "loss": 3.5153, + "step": 14200 + }, + { + "epoch": 0.42, + "learning_rate": 4.687090002623983e-05, + "loss": 3.5456, + "step": 14300 + }, + { + "epoch": 0.43, + "learning_rate": 4.719889792705327e-05, + "loss": 3.4878, + "step": 14400 + }, + { + "epoch": 0.43, + "learning_rate": 4.75268958278667e-05, + "loss": 3.4686, + "step": 14500 + }, + { + "epoch": 0.43, + "learning_rate": 4.785489372868014e-05, + "loss": 3.4891, + "step": 14600 + }, + { + "epoch": 0.43, + "learning_rate": 4.818289162949357e-05, + "loss": 3.4157, + "step": 14700 + }, + { + "epoch": 0.44, + "learning_rate": 4.8510889530307006e-05, + "loss": 3.444, + "step": 14800 + }, + { + "epoch": 0.44, + "learning_rate": 4.8838887431120446e-05, + "loss": 3.5387, + "step": 14900 + }, + { + "epoch": 0.44, + "learning_rate": 4.916688533193387e-05, + "loss": 3.5091, + "step": 15000 + }, + { + "epoch": 0.45, + "learning_rate": 4.949488323274731e-05, + "loss": 3.5356, + "step": 15100 + }, + { + "epoch": 0.45, + "learning_rate": 4.9822881133560746e-05, + "loss": 3.4978, + "step": 15200 + }, + { + "epoch": 0.45, + "learning_rate": 4.997337377432538e-05, + "loss": 3.4566, + "step": 15300 + }, + { + "epoch": 0.45, + "learning_rate": 4.991549067503271e-05, + "loss": 3.5653, + "step": 15400 + }, + { + "epoch": 0.46, + "learning_rate": 4.985760757574004e-05, + "loss": 3.5342, + "step": 15500 + }, + { + "epoch": 0.46, + "learning_rate": 4.979972447644737e-05, + "loss": 3.5189, + "step": 15600 + }, + { + "epoch": 0.46, + "learning_rate": 4.97418413771547e-05, + "loss": 3.5025, + "step": 15700 + }, + { + "epoch": 0.47, + "learning_rate": 4.968395827786203e-05, + "loss": 3.5216, + "step": 15800 + }, + { + "epoch": 0.47, + "learning_rate": 4.9626075178569366e-05, + "loss": 3.5428, + "step": 15900 + }, + { + "epoch": 0.47, + "learning_rate": 4.9568192079276696e-05, + "loss": 3.5112, + "step": 16000 + }, + { + "epoch": 0.48, + "learning_rate": 4.9510308979984026e-05, + "loss": 3.4538, + "step": 16100 + }, + { + "epoch": 0.48, + "learning_rate": 4.9452425880691356e-05, + "loss": 3.5476, + "step": 16200 + }, + { + "epoch": 0.48, + "learning_rate": 4.9394542781398686e-05, + "loss": 3.5039, + "step": 16300 + }, + { + "epoch": 0.48, + "learning_rate": 4.933665968210602e-05, + "loss": 3.4226, + "step": 16400 + }, + { + "epoch": 0.49, + "learning_rate": 4.927877658281335e-05, + "loss": 3.5409, + "step": 16500 + }, + { + "epoch": 0.49, + "learning_rate": 4.922089348352068e-05, + "loss": 3.4823, + "step": 16600 + }, + { + "epoch": 0.49, + "learning_rate": 4.916301038422801e-05, + "loss": 3.5517, + "step": 16700 + }, + { + "epoch": 0.5, + "learning_rate": 4.910512728493535e-05, + "loss": 3.5905, + "step": 16800 + }, + { + "epoch": 0.5, + "learning_rate": 4.9047244185642673e-05, + "loss": 3.5193, + "step": 16900 + }, + { + "epoch": 0.5, + "learning_rate": 4.898936108635001e-05, + "loss": 3.5474, + "step": 17000 + }, + { + "epoch": 0.5, + "learning_rate": 4.893147798705734e-05, + "loss": 3.5083, + "step": 17100 + }, + { + "epoch": 0.51, + "learning_rate": 4.887359488776467e-05, + "loss": 3.5096, + "step": 17200 + }, + { + "epoch": 0.51, + "learning_rate": 4.8815711788472e-05, + "loss": 3.4946, + "step": 17300 + }, + { + "epoch": 0.51, + "learning_rate": 4.875782868917934e-05, + "loss": 3.5561, + "step": 17400 + }, + { + "epoch": 0.52, + "learning_rate": 4.869994558988667e-05, + "loss": 3.5484, + "step": 17500 + }, + { + "epoch": 0.52, + "learning_rate": 4.864264132158692e-05, + "loss": 3.473, + "step": 17600 + }, + { + "epoch": 0.52, + "learning_rate": 4.858475822229426e-05, + "loss": 3.4777, + "step": 17700 + }, + { + "epoch": 0.53, + "learning_rate": 4.852687512300159e-05, + "loss": 3.5096, + "step": 17800 + }, + { + "epoch": 0.53, + "learning_rate": 4.846899202370892e-05, + "loss": 3.5513, + "step": 17900 + }, + { + "epoch": 0.53, + "learning_rate": 4.841110892441625e-05, + "loss": 3.54, + "step": 18000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8353225825123584e-05, + "loss": 3.4855, + "step": 18100 + }, + { + "epoch": 0.54, + "learning_rate": 4.8295342725830915e-05, + "loss": 3.4912, + "step": 18200 + }, + { + "epoch": 0.54, + "learning_rate": 4.823803845753117e-05, + "loss": 3.4644, + "step": 18300 + }, + { + "epoch": 0.54, + "learning_rate": 4.8180155358238504e-05, + "loss": 3.5219, + "step": 18400 + }, + { + "epoch": 0.55, + "learning_rate": 4.8122272258945834e-05, + "loss": 3.4587, + "step": 18500 + }, + { + "epoch": 0.55, + "learning_rate": 4.8064389159653164e-05, + "loss": 3.5399, + "step": 18600 + }, + { + "epoch": 0.55, + "learning_rate": 4.8006506060360494e-05, + "loss": 3.5016, + "step": 18700 + }, + { + "epoch": 0.55, + "learning_rate": 4.794862296106783e-05, + "loss": 3.4807, + "step": 18800 + }, + { + "epoch": 0.56, + "learning_rate": 4.789073986177516e-05, + "loss": 3.5076, + "step": 18900 + }, + { + "epoch": 0.56, + "learning_rate": 4.783285676248249e-05, + "loss": 3.4874, + "step": 19000 + }, + { + "epoch": 0.56, + "learning_rate": 4.777555249418275e-05, + "loss": 3.4773, + "step": 19100 + }, + { + "epoch": 0.57, + "learning_rate": 4.771766939489008e-05, + "loss": 3.5177, + "step": 19200 + }, + { + "epoch": 0.57, + "learning_rate": 4.765978629559741e-05, + "loss": 3.4338, + "step": 19300 + }, + { + "epoch": 0.57, + "learning_rate": 4.760190319630475e-05, + "loss": 3.4832, + "step": 19400 + }, + { + "epoch": 0.58, + "learning_rate": 4.754402009701208e-05, + "loss": 3.3966, + "step": 19500 + }, + { + "epoch": 0.58, + "learning_rate": 4.748613699771941e-05, + "loss": 3.5248, + "step": 19600 + }, + { + "epoch": 0.58, + "learning_rate": 4.742825389842674e-05, + "loss": 3.5104, + "step": 19700 + }, + { + "epoch": 0.58, + "learning_rate": 4.7370370799134075e-05, + "loss": 3.4758, + "step": 19800 + }, + { + "epoch": 0.59, + "learning_rate": 4.73124876998414e-05, + "loss": 3.468, + "step": 19900 + }, + { + "epoch": 0.59, + "learning_rate": 4.7254604600548735e-05, + "loss": 3.5482, + "step": 20000 + }, + { + "epoch": 0.59, + "learning_rate": 4.7196721501256066e-05, + "loss": 3.4968, + "step": 20100 + }, + { + "epoch": 0.6, + "learning_rate": 4.7138838401963396e-05, + "loss": 3.4743, + "step": 20200 + }, + { + "epoch": 0.6, + "learning_rate": 4.7080955302670726e-05, + "loss": 3.4734, + "step": 20300 + }, + { + "epoch": 0.6, + "learning_rate": 4.702307220337806e-05, + "loss": 3.4668, + "step": 20400 + }, + { + "epoch": 0.61, + "learning_rate": 4.6965189104085386e-05, + "loss": 3.4479, + "step": 20500 + }, + { + "epoch": 0.61, + "learning_rate": 4.690730600479272e-05, + "loss": 3.4609, + "step": 20600 + }, + { + "epoch": 0.61, + "learning_rate": 4.684942290550006e-05, + "loss": 3.5061, + "step": 20700 + }, + { + "epoch": 0.61, + "learning_rate": 4.679153980620738e-05, + "loss": 3.474, + "step": 20800 + }, + { + "epoch": 0.62, + "learning_rate": 4.673365670691472e-05, + "loss": 3.46, + "step": 20900 + }, + { + "epoch": 0.62, + "learning_rate": 4.667577360762205e-05, + "loss": 3.4596, + "step": 21000 + }, + { + "epoch": 0.62, + "learning_rate": 4.661789050832938e-05, + "loss": 3.5117, + "step": 21100 + }, + { + "epoch": 0.63, + "learning_rate": 4.656000740903671e-05, + "loss": 3.5576, + "step": 21200 + }, + { + "epoch": 0.63, + "learning_rate": 4.650212430974405e-05, + "loss": 3.5696, + "step": 21300 + }, + { + "epoch": 0.63, + "learning_rate": 4.644424121045137e-05, + "loss": 3.4833, + "step": 21400 + }, + { + "epoch": 0.63, + "learning_rate": 4.638635811115871e-05, + "loss": 3.4939, + "step": 21500 + }, + { + "epoch": 0.64, + "learning_rate": 4.632905384285897e-05, + "loss": 3.4656, + "step": 21600 + }, + { + "epoch": 0.64, + "learning_rate": 4.62711707435663e-05, + "loss": 3.4646, + "step": 21700 + }, + { + "epoch": 0.64, + "learning_rate": 4.621328764427363e-05, + "loss": 3.5053, + "step": 21800 + }, + { + "epoch": 0.65, + "learning_rate": 4.615540454498096e-05, + "loss": 3.426, + "step": 21900 + }, + { + "epoch": 0.65, + "learning_rate": 4.6097521445688294e-05, + "loss": 3.5011, + "step": 22000 + }, + { + "epoch": 0.65, + "learning_rate": 4.603963834639562e-05, + "loss": 3.5055, + "step": 22100 + }, + { + "epoch": 0.66, + "learning_rate": 4.5981755247102954e-05, + "loss": 3.5099, + "step": 22200 + }, + { + "epoch": 0.66, + "learning_rate": 4.5923872147810284e-05, + "loss": 3.4436, + "step": 22300 + }, + { + "epoch": 0.66, + "learning_rate": 4.5865989048517614e-05, + "loss": 3.4445, + "step": 22400 + }, + { + "epoch": 0.66, + "learning_rate": 4.580810594922495e-05, + "loss": 3.4688, + "step": 22500 + }, + { + "epoch": 0.67, + "learning_rate": 4.575022284993228e-05, + "loss": 3.4393, + "step": 22600 + }, + { + "epoch": 0.67, + "learning_rate": 4.569233975063961e-05, + "loss": 3.455, + "step": 22700 + }, + { + "epoch": 0.67, + "learning_rate": 4.563445665134694e-05, + "loss": 3.47, + "step": 22800 + }, + { + "epoch": 0.68, + "learning_rate": 4.557657355205427e-05, + "loss": 3.4016, + "step": 22900 + }, + { + "epoch": 0.68, + "learning_rate": 4.55186904527616e-05, + "loss": 3.4426, + "step": 23000 + }, + { + "epoch": 0.68, + "learning_rate": 4.546080735346894e-05, + "loss": 3.4976, + "step": 23100 + }, + { + "epoch": 0.68, + "learning_rate": 4.540292425417627e-05, + "loss": 3.4368, + "step": 23200 + }, + { + "epoch": 0.69, + "learning_rate": 4.53450411548836e-05, + "loss": 3.439, + "step": 23300 + }, + { + "epoch": 0.69, + "learning_rate": 4.528715805559093e-05, + "loss": 3.5062, + "step": 23400 + }, + { + "epoch": 0.69, + "learning_rate": 4.522927495629826e-05, + "loss": 3.4495, + "step": 23500 + }, + { + "epoch": 0.7, + "learning_rate": 4.5171391857005596e-05, + "loss": 3.4425, + "step": 23600 + }, + { + "epoch": 0.7, + "learning_rate": 4.5113508757712926e-05, + "loss": 3.4741, + "step": 23700 + }, + { + "epoch": 0.7, + "learning_rate": 4.5055625658420256e-05, + "loss": 3.4746, + "step": 23800 + }, + { + "epoch": 0.71, + "learning_rate": 4.4997742559127586e-05, + "loss": 3.3827, + "step": 23900 + }, + { + "epoch": 0.71, + "learning_rate": 4.493985945983492e-05, + "loss": 3.3663, + "step": 24000 + }, + { + "epoch": 0.71, + "learning_rate": 4.4881976360542246e-05, + "loss": 3.4349, + "step": 24100 + }, + { + "epoch": 0.71, + "learning_rate": 4.482409326124958e-05, + "loss": 3.4367, + "step": 24200 + }, + { + "epoch": 0.72, + "learning_rate": 4.4766210161956913e-05, + "loss": 3.3795, + "step": 24300 + }, + { + "epoch": 0.72, + "learning_rate": 4.4708327062664244e-05, + "loss": 3.4992, + "step": 24400 + }, + { + "epoch": 0.72, + "learning_rate": 4.4650443963371574e-05, + "loss": 3.4647, + "step": 24500 + }, + { + "epoch": 0.73, + "learning_rate": 4.459256086407891e-05, + "loss": 3.441, + "step": 24600 + }, + { + "epoch": 0.73, + "learning_rate": 4.453467776478624e-05, + "loss": 3.4718, + "step": 24700 + }, + { + "epoch": 0.73, + "learning_rate": 4.447679466549357e-05, + "loss": 3.4318, + "step": 24800 + }, + { + "epoch": 0.74, + "learning_rate": 4.441891156620091e-05, + "loss": 3.4683, + "step": 24900 + }, + { + "epoch": 0.74, + "learning_rate": 4.436102846690823e-05, + "loss": 3.4339, + "step": 25000 + }, + { + "epoch": 0.74, + "learning_rate": 4.430314536761557e-05, + "loss": 3.4285, + "step": 25100 + }, + { + "epoch": 0.74, + "learning_rate": 4.42452622683229e-05, + "loss": 3.3831, + "step": 25200 + }, + { + "epoch": 0.75, + "learning_rate": 4.418737916903023e-05, + "loss": 3.4561, + "step": 25300 + }, + { + "epoch": 0.75, + "learning_rate": 4.412949606973756e-05, + "loss": 3.4521, + "step": 25400 + }, + { + "epoch": 0.75, + "learning_rate": 4.4071612970444895e-05, + "loss": 3.4461, + "step": 25500 + }, + { + "epoch": 0.76, + "learning_rate": 4.401372987115222e-05, + "loss": 3.52, + "step": 25600 + }, + { + "epoch": 0.76, + "learning_rate": 4.3955846771859555e-05, + "loss": 3.4293, + "step": 25700 + }, + { + "epoch": 0.76, + "learning_rate": 4.3897963672566885e-05, + "loss": 3.3902, + "step": 25800 + }, + { + "epoch": 0.76, + "learning_rate": 4.3840080573274215e-05, + "loss": 3.4207, + "step": 25900 + }, + { + "epoch": 0.77, + "learning_rate": 4.378219747398155e-05, + "loss": 3.5207, + "step": 26000 + }, + { + "epoch": 0.77, + "learning_rate": 4.372431437468888e-05, + "loss": 3.3999, + "step": 26100 + }, + { + "epoch": 0.77, + "learning_rate": 4.366643127539621e-05, + "loss": 3.4113, + "step": 26200 + }, + { + "epoch": 0.78, + "learning_rate": 4.360854817610354e-05, + "loss": 3.4914, + "step": 26300 + }, + { + "epoch": 0.78, + "learning_rate": 4.355066507681087e-05, + "loss": 3.4771, + "step": 26400 + }, + { + "epoch": 0.78, + "learning_rate": 4.349336080851113e-05, + "loss": 3.4838, + "step": 26500 + }, + { + "epoch": 0.79, + "learning_rate": 4.343547770921846e-05, + "loss": 3.4307, + "step": 26600 + }, + { + "epoch": 0.79, + "learning_rate": 4.33775946099258e-05, + "loss": 3.539, + "step": 26700 + }, + { + "epoch": 0.79, + "learning_rate": 4.331971151063313e-05, + "loss": 3.4617, + "step": 26800 + }, + { + "epoch": 0.79, + "learning_rate": 4.326182841134046e-05, + "loss": 3.4023, + "step": 26900 + }, + { + "epoch": 0.8, + "learning_rate": 4.320394531204779e-05, + "loss": 3.4677, + "step": 27000 + }, + { + "epoch": 0.8, + "learning_rate": 4.314606221275512e-05, + "loss": 3.4065, + "step": 27100 + }, + { + "epoch": 0.8, + "learning_rate": 4.308817911346245e-05, + "loss": 3.4339, + "step": 27200 + }, + { + "epoch": 0.81, + "learning_rate": 4.3030296014169787e-05, + "loss": 3.4403, + "step": 27300 + }, + { + "epoch": 0.81, + "learning_rate": 4.297241291487712e-05, + "loss": 3.4626, + "step": 27400 + }, + { + "epoch": 0.81, + "learning_rate": 4.291452981558445e-05, + "loss": 3.4698, + "step": 27500 + }, + { + "epoch": 0.81, + "learning_rate": 4.2856646716291784e-05, + "loss": 3.4339, + "step": 27600 + }, + { + "epoch": 0.82, + "learning_rate": 4.279876361699911e-05, + "loss": 3.3525, + "step": 27700 + }, + { + "epoch": 0.82, + "learning_rate": 4.2740880517706444e-05, + "loss": 3.4385, + "step": 27800 + }, + { + "epoch": 0.82, + "learning_rate": 4.2682997418413774e-05, + "loss": 3.3924, + "step": 27900 + }, + { + "epoch": 0.83, + "learning_rate": 4.2625114319121104e-05, + "loss": 3.4542, + "step": 28000 + }, + { + "epoch": 0.83, + "learning_rate": 4.2567810050821364e-05, + "loss": 3.439, + "step": 28100 + }, + { + "epoch": 0.83, + "learning_rate": 4.2509926951528694e-05, + "loss": 3.4499, + "step": 28200 + }, + { + "epoch": 0.84, + "learning_rate": 4.245204385223603e-05, + "loss": 3.4655, + "step": 28300 + }, + { + "epoch": 0.84, + "learning_rate": 4.2394160752943354e-05, + "loss": 3.4605, + "step": 28400 + }, + { + "epoch": 0.84, + "learning_rate": 4.233627765365069e-05, + "loss": 3.413, + "step": 28500 + }, + { + "epoch": 0.84, + "learning_rate": 4.227839455435802e-05, + "loss": 3.4802, + "step": 28600 + }, + { + "epoch": 0.85, + "learning_rate": 4.222051145506535e-05, + "loss": 3.4359, + "step": 28700 + }, + { + "epoch": 0.85, + "learning_rate": 4.216262835577268e-05, + "loss": 3.3859, + "step": 28800 + }, + { + "epoch": 0.85, + "learning_rate": 4.210474525648002e-05, + "loss": 3.3875, + "step": 28900 + }, + { + "epoch": 0.86, + "learning_rate": 4.204686215718734e-05, + "loss": 3.3715, + "step": 29000 + }, + { + "epoch": 0.86, + "learning_rate": 4.198897905789468e-05, + "loss": 3.3754, + "step": 29100 + }, + { + "epoch": 0.86, + "learning_rate": 4.193109595860201e-05, + "loss": 3.3918, + "step": 29200 + }, + { + "epoch": 0.86, + "learning_rate": 4.187321285930934e-05, + "loss": 3.4793, + "step": 29300 + }, + { + "epoch": 0.87, + "learning_rate": 4.1815329760016675e-05, + "loss": 3.4157, + "step": 29400 + }, + { + "epoch": 0.87, + "learning_rate": 4.1757446660724005e-05, + "loss": 3.4226, + "step": 29500 + }, + { + "epoch": 0.87, + "learning_rate": 4.1699563561431335e-05, + "loss": 3.4078, + "step": 29600 + }, + { + "epoch": 0.88, + "learning_rate": 4.1641680462138665e-05, + "loss": 3.4142, + "step": 29700 + }, + { + "epoch": 0.88, + "learning_rate": 4.1583797362846e-05, + "loss": 3.4222, + "step": 29800 + }, + { + "epoch": 0.88, + "learning_rate": 4.1525914263553326e-05, + "loss": 3.4325, + "step": 29900 + }, + { + "epoch": 0.89, + "learning_rate": 4.146803116426066e-05, + "loss": 3.4555, + "step": 30000 + }, + { + "epoch": 0.89, + "learning_rate": 4.141014806496799e-05, + "loss": 3.432, + "step": 30100 + }, + { + "epoch": 0.89, + "learning_rate": 4.135226496567532e-05, + "loss": 3.3894, + "step": 30200 + }, + { + "epoch": 0.89, + "learning_rate": 4.129438186638265e-05, + "loss": 3.4094, + "step": 30300 + }, + { + "epoch": 0.9, + "learning_rate": 4.123649876708999e-05, + "loss": 3.4366, + "step": 30400 + }, + { + "epoch": 0.9, + "learning_rate": 4.117861566779732e-05, + "loss": 3.4109, + "step": 30500 + }, + { + "epoch": 0.9, + "learning_rate": 4.112073256850465e-05, + "loss": 3.4778, + "step": 30600 + }, + { + "epoch": 0.91, + "learning_rate": 4.106284946921199e-05, + "loss": 3.4626, + "step": 30700 + }, + { + "epoch": 0.91, + "learning_rate": 4.100496636991931e-05, + "loss": 3.3753, + "step": 30800 + }, + { + "epoch": 0.91, + "learning_rate": 4.094708327062665e-05, + "loss": 3.4158, + "step": 30900 + }, + { + "epoch": 0.92, + "learning_rate": 4.088920017133398e-05, + "loss": 3.4925, + "step": 31000 + }, + { + "epoch": 0.92, + "learning_rate": 4.083131707204131e-05, + "loss": 3.4383, + "step": 31100 + }, + { + "epoch": 0.92, + "learning_rate": 4.077343397274864e-05, + "loss": 3.3489, + "step": 31200 + }, + { + "epoch": 0.92, + "learning_rate": 4.0715550873455974e-05, + "loss": 3.3988, + "step": 31300 + }, + { + "epoch": 0.93, + "learning_rate": 4.06576677741633e-05, + "loss": 3.4295, + "step": 31400 + }, + { + "epoch": 0.93, + "learning_rate": 4.0599784674870634e-05, + "loss": 3.4186, + "step": 31500 + }, + { + "epoch": 0.93, + "learning_rate": 4.0541901575577965e-05, + "loss": 3.4509, + "step": 31600 + }, + { + "epoch": 0.94, + "learning_rate": 4.0484018476285295e-05, + "loss": 3.3621, + "step": 31700 + }, + { + "epoch": 0.94, + "learning_rate": 4.042613537699263e-05, + "loss": 3.4542, + "step": 31800 + }, + { + "epoch": 0.94, + "learning_rate": 4.036825227769996e-05, + "loss": 3.3612, + "step": 31900 + }, + { + "epoch": 0.94, + "learning_rate": 4.031036917840729e-05, + "loss": 3.3997, + "step": 32000 + }, + { + "epoch": 0.95, + "learning_rate": 4.025248607911462e-05, + "loss": 3.4172, + "step": 32100 + }, + { + "epoch": 0.95, + "learning_rate": 4.019460297982195e-05, + "loss": 3.4839, + "step": 32200 + }, + { + "epoch": 0.95, + "learning_rate": 4.013671988052928e-05, + "loss": 3.3381, + "step": 32300 + }, + { + "epoch": 0.96, + "learning_rate": 4.007883678123662e-05, + "loss": 3.4331, + "step": 32400 + }, + { + "epoch": 0.96, + "learning_rate": 4.002095368194395e-05, + "loss": 3.3947, + "step": 32500 + }, + { + "epoch": 0.96, + "learning_rate": 3.996307058265128e-05, + "loss": 3.3557, + "step": 32600 + }, + { + "epoch": 0.97, + "learning_rate": 3.990518748335861e-05, + "loss": 3.3993, + "step": 32700 + }, + { + "epoch": 0.97, + "learning_rate": 3.984730438406594e-05, + "loss": 3.3265, + "step": 32800 + }, + { + "epoch": 0.97, + "learning_rate": 3.9789421284773276e-05, + "loss": 3.3464, + "step": 32900 + }, + { + "epoch": 0.97, + "learning_rate": 3.9731538185480606e-05, + "loss": 3.354, + "step": 33000 + }, + { + "epoch": 0.98, + "learning_rate": 3.9673655086187936e-05, + "loss": 3.4181, + "step": 33100 + }, + { + "epoch": 0.98, + "learning_rate": 3.9615771986895266e-05, + "loss": 3.3932, + "step": 33200 + }, + { + "epoch": 0.98, + "learning_rate": 3.9558467718595526e-05, + "loss": 3.3718, + "step": 33300 + }, + { + "epoch": 0.99, + "learning_rate": 3.950058461930286e-05, + "loss": 3.3526, + "step": 33400 + }, + { + "epoch": 0.99, + "learning_rate": 3.9442701520010186e-05, + "loss": 3.4861, + "step": 33500 + }, + { + "epoch": 0.99, + "learning_rate": 3.938481842071752e-05, + "loss": 3.4428, + "step": 33600 + }, + { + "epoch": 0.99, + "learning_rate": 3.932693532142485e-05, + "loss": 3.4784, + "step": 33700 + }, + { + "epoch": 1.0, + "learning_rate": 3.926905222213218e-05, + "loss": 3.3259, + "step": 33800 + }, + { + "epoch": 1.0, + "eval_gen_len": 19.3901, + "eval_loss": 3.25349760055542, + "eval_rouge1": 17.942, + "eval_rouge2": 4.5143, + "eval_rougeL": 14.2766, + "eval_rougeLsum": 15.582, + "eval_runtime": 746.4697, + "eval_samples_per_second": 10.083, + "eval_steps_per_second": 2.521, + "step": 33875 + }, + { + "epoch": 1.0, + "learning_rate": 3.921116912283951e-05, + "loss": 3.3341, + "step": 33900 + }, + { + "epoch": 1.0, + "learning_rate": 3.915328602354685e-05, + "loss": 3.2272, + "step": 34000 + }, + { + "epoch": 1.01, + "learning_rate": 3.9095402924254174e-05, + "loss": 3.1016, + "step": 34100 + }, + { + "epoch": 1.01, + "learning_rate": 3.903751982496151e-05, + "loss": 3.0869, + "step": 34200 + }, + { + "epoch": 1.01, + "learning_rate": 3.897963672566884e-05, + "loss": 3.0574, + "step": 34300 + }, + { + "epoch": 1.02, + "learning_rate": 3.892175362637617e-05, + "loss": 3.0656, + "step": 34400 + }, + { + "epoch": 1.02, + "learning_rate": 3.88638705270835e-05, + "loss": 3.0487, + "step": 34500 + }, + { + "epoch": 1.02, + "learning_rate": 3.880598742779084e-05, + "loss": 3.0965, + "step": 34600 + }, + { + "epoch": 1.02, + "learning_rate": 3.874810432849817e-05, + "loss": 3.1261, + "step": 34700 + }, + { + "epoch": 1.03, + "learning_rate": 3.86902212292055e-05, + "loss": 3.0195, + "step": 34800 + }, + { + "epoch": 1.03, + "learning_rate": 3.8632338129912835e-05, + "loss": 3.1102, + "step": 34900 + }, + { + "epoch": 1.03, + "learning_rate": 3.857445503062016e-05, + "loss": 3.0478, + "step": 35000 + }, + { + "epoch": 1.04, + "learning_rate": 3.8516571931327495e-05, + "loss": 3.0921, + "step": 35100 + }, + { + "epoch": 1.04, + "learning_rate": 3.8458688832034825e-05, + "loss": 3.0725, + "step": 35200 + }, + { + "epoch": 1.04, + "learning_rate": 3.8400805732742155e-05, + "loss": 3.0383, + "step": 35300 + }, + { + "epoch": 1.05, + "learning_rate": 3.8342922633449485e-05, + "loss": 3.0486, + "step": 35400 + }, + { + "epoch": 1.05, + "learning_rate": 3.828503953415682e-05, + "loss": 3.0608, + "step": 35500 + }, + { + "epoch": 1.05, + "learning_rate": 3.8227156434864145e-05, + "loss": 3.09, + "step": 35600 + }, + { + "epoch": 1.05, + "learning_rate": 3.8169852166564405e-05, + "loss": 3.0758, + "step": 35700 + }, + { + "epoch": 1.06, + "learning_rate": 3.811196906727174e-05, + "loss": 3.0034, + "step": 35800 + }, + { + "epoch": 1.06, + "learning_rate": 3.805408596797907e-05, + "loss": 3.1317, + "step": 35900 + }, + { + "epoch": 1.06, + "learning_rate": 3.79962028686864e-05, + "loss": 3.09, + "step": 36000 + }, + { + "epoch": 1.07, + "learning_rate": 3.793831976939373e-05, + "loss": 3.1168, + "step": 36100 + }, + { + "epoch": 1.07, + "learning_rate": 3.788043667010107e-05, + "loss": 3.115, + "step": 36200 + }, + { + "epoch": 1.07, + "learning_rate": 3.782255357080839e-05, + "loss": 3.0612, + "step": 36300 + }, + { + "epoch": 1.07, + "learning_rate": 3.776467047151573e-05, + "loss": 3.0553, + "step": 36400 + }, + { + "epoch": 1.08, + "learning_rate": 3.770678737222306e-05, + "loss": 3.0838, + "step": 36500 + }, + { + "epoch": 1.08, + "learning_rate": 3.764890427293039e-05, + "loss": 3.13, + "step": 36600 + }, + { + "epoch": 1.08, + "learning_rate": 3.7591021173637726e-05, + "loss": 3.0685, + "step": 36700 + }, + { + "epoch": 1.09, + "learning_rate": 3.7533138074345056e-05, + "loss": 3.0328, + "step": 36800 + }, + { + "epoch": 1.09, + "learning_rate": 3.7475254975052386e-05, + "loss": 3.0607, + "step": 36900 + }, + { + "epoch": 1.09, + "learning_rate": 3.7417371875759717e-05, + "loss": 3.0557, + "step": 37000 + }, + { + "epoch": 1.1, + "learning_rate": 3.735948877646705e-05, + "loss": 3.1025, + "step": 37100 + }, + { + "epoch": 1.1, + "learning_rate": 3.730160567717438e-05, + "loss": 3.0598, + "step": 37200 + }, + { + "epoch": 1.1, + "learning_rate": 3.7243722577881714e-05, + "loss": 3.1292, + "step": 37300 + }, + { + "epoch": 1.1, + "learning_rate": 3.7185839478589044e-05, + "loss": 3.0739, + "step": 37400 + }, + { + "epoch": 1.11, + "learning_rate": 3.7127956379296374e-05, + "loss": 3.1366, + "step": 37500 + }, + { + "epoch": 1.11, + "learning_rate": 3.707007328000371e-05, + "loss": 3.0558, + "step": 37600 + }, + { + "epoch": 1.11, + "learning_rate": 3.7012190180711034e-05, + "loss": 3.0975, + "step": 37700 + }, + { + "epoch": 1.12, + "learning_rate": 3.695430708141837e-05, + "loss": 3.0948, + "step": 37800 + }, + { + "epoch": 1.12, + "learning_rate": 3.68964239821257e-05, + "loss": 3.1102, + "step": 37900 + }, + { + "epoch": 1.12, + "learning_rate": 3.683854088283303e-05, + "loss": 3.1117, + "step": 38000 + }, + { + "epoch": 1.12, + "learning_rate": 3.678065778354036e-05, + "loss": 3.007, + "step": 38100 + }, + { + "epoch": 1.13, + "learning_rate": 3.67227746842477e-05, + "loss": 2.9965, + "step": 38200 + }, + { + "epoch": 1.13, + "learning_rate": 3.666489158495502e-05, + "loss": 3.0942, + "step": 38300 + }, + { + "epoch": 1.13, + "learning_rate": 3.660700848566236e-05, + "loss": 3.0544, + "step": 38400 + }, + { + "epoch": 1.14, + "learning_rate": 3.654912538636969e-05, + "loss": 3.071, + "step": 38500 + }, + { + "epoch": 1.14, + "learning_rate": 3.649124228707702e-05, + "loss": 3.0174, + "step": 38600 + }, + { + "epoch": 1.14, + "learning_rate": 3.6433359187784355e-05, + "loss": 3.0719, + "step": 38700 + }, + { + "epoch": 1.15, + "learning_rate": 3.6375476088491685e-05, + "loss": 3.0904, + "step": 38800 + }, + { + "epoch": 1.15, + "learning_rate": 3.6317592989199016e-05, + "loss": 3.0974, + "step": 38900 + }, + { + "epoch": 1.15, + "learning_rate": 3.6259709889906346e-05, + "loss": 3.0983, + "step": 39000 + }, + { + "epoch": 1.15, + "learning_rate": 3.620182679061368e-05, + "loss": 3.092, + "step": 39100 + }, + { + "epoch": 1.16, + "learning_rate": 3.6143943691321006e-05, + "loss": 3.0146, + "step": 39200 + }, + { + "epoch": 1.16, + "learning_rate": 3.608606059202834e-05, + "loss": 3.1821, + "step": 39300 + }, + { + "epoch": 1.16, + "learning_rate": 3.602817749273567e-05, + "loss": 3.1287, + "step": 39400 + }, + { + "epoch": 1.17, + "learning_rate": 3.5970294393443e-05, + "loss": 3.0698, + "step": 39500 + }, + { + "epoch": 1.17, + "learning_rate": 3.591241129415033e-05, + "loss": 3.0935, + "step": 39600 + }, + { + "epoch": 1.17, + "learning_rate": 3.585510702585059e-05, + "loss": 3.0959, + "step": 39700 + }, + { + "epoch": 1.17, + "learning_rate": 3.579722392655793e-05, + "loss": 3.0677, + "step": 39800 + }, + { + "epoch": 1.18, + "learning_rate": 3.573934082726525e-05, + "loss": 3.12, + "step": 39900 + }, + { + "epoch": 1.18, + "learning_rate": 3.568145772797259e-05, + "loss": 3.0438, + "step": 40000 + }, + { + "epoch": 1.18, + "learning_rate": 3.562357462867992e-05, + "loss": 3.1065, + "step": 40100 + }, + { + "epoch": 1.19, + "learning_rate": 3.556569152938725e-05, + "loss": 3.122, + "step": 40200 + }, + { + "epoch": 1.19, + "learning_rate": 3.550780843009458e-05, + "loss": 3.0528, + "step": 40300 + }, + { + "epoch": 1.19, + "learning_rate": 3.544992533080192e-05, + "loss": 3.0939, + "step": 40400 + }, + { + "epoch": 1.2, + "learning_rate": 3.539204223150925e-05, + "loss": 3.0964, + "step": 40500 + }, + { + "epoch": 1.2, + "learning_rate": 3.533415913221658e-05, + "loss": 3.0712, + "step": 40600 + }, + { + "epoch": 1.2, + "learning_rate": 3.5276276032923914e-05, + "loss": 3.0369, + "step": 40700 + }, + { + "epoch": 1.2, + "learning_rate": 3.521839293363124e-05, + "loss": 3.08, + "step": 40800 + }, + { + "epoch": 1.21, + "learning_rate": 3.5160509834338574e-05, + "loss": 3.1407, + "step": 40900 + }, + { + "epoch": 1.21, + "learning_rate": 3.5102626735045904e-05, + "loss": 3.0949, + "step": 41000 + }, + { + "epoch": 1.21, + "learning_rate": 3.5044743635753234e-05, + "loss": 3.157, + "step": 41100 + }, + { + "epoch": 1.22, + "learning_rate": 3.4986860536460564e-05, + "loss": 3.0251, + "step": 41200 + }, + { + "epoch": 1.22, + "learning_rate": 3.49289774371679e-05, + "loss": 3.1212, + "step": 41300 + }, + { + "epoch": 1.22, + "learning_rate": 3.4871094337875225e-05, + "loss": 3.1201, + "step": 41400 + }, + { + "epoch": 1.23, + "learning_rate": 3.481321123858256e-05, + "loss": 3.1373, + "step": 41500 + }, + { + "epoch": 1.23, + "learning_rate": 3.475532813928989e-05, + "loss": 3.1454, + "step": 41600 + }, + { + "epoch": 1.23, + "learning_rate": 3.469802387099015e-05, + "loss": 3.1846, + "step": 41700 + }, + { + "epoch": 1.23, + "learning_rate": 3.464014077169748e-05, + "loss": 3.075, + "step": 41800 + }, + { + "epoch": 1.24, + "learning_rate": 3.458225767240481e-05, + "loss": 3.1067, + "step": 41900 + }, + { + "epoch": 1.24, + "learning_rate": 3.452437457311215e-05, + "loss": 3.0698, + "step": 42000 + }, + { + "epoch": 1.24, + "learning_rate": 3.446649147381947e-05, + "loss": 3.197, + "step": 42100 + }, + { + "epoch": 1.25, + "learning_rate": 3.440860837452681e-05, + "loss": 3.0678, + "step": 42200 + }, + { + "epoch": 1.25, + "learning_rate": 3.435072527523414e-05, + "loss": 3.111, + "step": 42300 + }, + { + "epoch": 1.25, + "learning_rate": 3.429284217594147e-05, + "loss": 3.1098, + "step": 42400 + }, + { + "epoch": 1.25, + "learning_rate": 3.4234959076648805e-05, + "loss": 3.0416, + "step": 42500 + }, + { + "epoch": 1.26, + "learning_rate": 3.4177075977356136e-05, + "loss": 3.0468, + "step": 42600 + }, + { + "epoch": 1.26, + "learning_rate": 3.4119192878063466e-05, + "loss": 3.1422, + "step": 42700 + }, + { + "epoch": 1.26, + "learning_rate": 3.4061309778770796e-05, + "loss": 3.1238, + "step": 42800 + }, + { + "epoch": 1.27, + "learning_rate": 3.4003426679478126e-05, + "loss": 3.0876, + "step": 42900 + }, + { + "epoch": 1.27, + "learning_rate": 3.3945543580185456e-05, + "loss": 3.0583, + "step": 43000 + }, + { + "epoch": 1.27, + "learning_rate": 3.388766048089279e-05, + "loss": 3.1087, + "step": 43100 + }, + { + "epoch": 1.28, + "learning_rate": 3.382977738160012e-05, + "loss": 3.0599, + "step": 43200 + }, + { + "epoch": 1.28, + "learning_rate": 3.377189428230745e-05, + "loss": 3.0935, + "step": 43300 + }, + { + "epoch": 1.28, + "learning_rate": 3.371401118301479e-05, + "loss": 3.1727, + "step": 43400 + }, + { + "epoch": 1.28, + "learning_rate": 3.365612808372211e-05, + "loss": 3.0115, + "step": 43500 + }, + { + "epoch": 1.29, + "learning_rate": 3.359824498442945e-05, + "loss": 3.1088, + "step": 43600 + }, + { + "epoch": 1.29, + "learning_rate": 3.354036188513678e-05, + "loss": 3.0485, + "step": 43700 + }, + { + "epoch": 1.29, + "learning_rate": 3.348247878584411e-05, + "loss": 3.102, + "step": 43800 + }, + { + "epoch": 1.3, + "learning_rate": 3.342459568655144e-05, + "loss": 3.0692, + "step": 43900 + }, + { + "epoch": 1.3, + "learning_rate": 3.336671258725878e-05, + "loss": 3.1147, + "step": 44000 + }, + { + "epoch": 1.3, + "learning_rate": 3.33088294879661e-05, + "loss": 3.1878, + "step": 44100 + }, + { + "epoch": 1.3, + "learning_rate": 3.325152521966636e-05, + "loss": 3.0811, + "step": 44200 + }, + { + "epoch": 1.31, + "learning_rate": 3.31936421203737e-05, + "loss": 3.0288, + "step": 44300 + }, + { + "epoch": 1.31, + "learning_rate": 3.313575902108103e-05, + "loss": 3.0671, + "step": 44400 + }, + { + "epoch": 1.31, + "learning_rate": 3.307787592178836e-05, + "loss": 3.0503, + "step": 44500 + }, + { + "epoch": 1.32, + "learning_rate": 3.301999282249569e-05, + "loss": 3.0966, + "step": 44600 + }, + { + "epoch": 1.32, + "learning_rate": 3.2962109723203024e-05, + "loss": 3.0395, + "step": 44700 + }, + { + "epoch": 1.32, + "learning_rate": 3.290422662391035e-05, + "loss": 3.0602, + "step": 44800 + }, + { + "epoch": 1.33, + "learning_rate": 3.2846343524617684e-05, + "loss": 3.075, + "step": 44900 + }, + { + "epoch": 1.33, + "learning_rate": 3.2788460425325014e-05, + "loss": 3.1158, + "step": 45000 + }, + { + "epoch": 1.33, + "learning_rate": 3.2730577326032345e-05, + "loss": 3.0903, + "step": 45100 + }, + { + "epoch": 1.33, + "learning_rate": 3.267269422673968e-05, + "loss": 3.0556, + "step": 45200 + }, + { + "epoch": 1.34, + "learning_rate": 3.261481112744701e-05, + "loss": 3.1016, + "step": 45300 }, { - "epoch": 0.31, - "learning_rate": 1.0364015766287967e-05, - "loss": 3.3371, - "step": 900 + "epoch": 1.34, + "learning_rate": 3.255692802815434e-05, + "loss": 3.1105, + "step": 45400 }, { - "epoch": 0.33, - "learning_rate": 1.0943658706236959e-05, - "loss": 3.3538, - "step": 950 + "epoch": 1.34, + "learning_rate": 3.249904492886167e-05, + "loss": 3.1088, + "step": 45500 }, { - "epoch": 0.35, - "learning_rate": 1.152330164618595e-05, - "loss": 3.2633, - "step": 1000 + "epoch": 1.35, + "learning_rate": 3.244116182956901e-05, + "loss": 3.1597, + "step": 45600 }, { - "epoch": 0.37, - "learning_rate": 1.2102944586134942e-05, - "loss": 3.3787, - "step": 1050 + "epoch": 1.35, + "learning_rate": 3.238327873027633e-05, + "loss": 3.0724, + "step": 45700 }, { - "epoch": 0.38, - "learning_rate": 1.2682587526083933e-05, - "loss": 3.2775, - "step": 1100 + "epoch": 1.35, + "learning_rate": 3.232539563098367e-05, + "loss": 3.0631, + "step": 45800 }, { - "epoch": 0.4, - "learning_rate": 1.3262230466032923e-05, - "loss": 3.2566, - "step": 1150 + "epoch": 1.35, + "learning_rate": 3.2267512531691e-05, + "loss": 3.0182, + "step": 45900 }, { - "epoch": 0.42, - "learning_rate": 1.3841873405981917e-05, - "loss": 3.3364, - "step": 1200 + "epoch": 1.36, + "learning_rate": 3.220962943239833e-05, + "loss": 3.0879, + "step": 46000 }, { - "epoch": 0.43, - "learning_rate": 1.4421516345930908e-05, - "loss": 3.2883, - "step": 1250 + "epoch": 1.36, + "learning_rate": 3.215174633310566e-05, + "loss": 3.1382, + "step": 46100 }, { - "epoch": 0.45, - "learning_rate": 1.5001159285879899e-05, - "loss": 3.3018, - "step": 1300 + "epoch": 1.36, + "learning_rate": 3.2093863233812996e-05, + "loss": 3.1101, + "step": 46200 }, { - "epoch": 0.47, - "learning_rate": 1.5580802225828888e-05, - "loss": 3.3564, - "step": 1350 + "epoch": 1.37, + "learning_rate": 3.2035980134520326e-05, + "loss": 3.0471, + "step": 46300 }, { - "epoch": 0.49, - "learning_rate": 1.6160445165777882e-05, - "loss": 3.2463, - "step": 1400 + "epoch": 1.37, + "learning_rate": 3.1978097035227656e-05, + "loss": 3.0564, + "step": 46400 }, { - "epoch": 0.5, - "learning_rate": 1.6740088105726872e-05, - "loss": 3.3455, - "step": 1450 + "epoch": 1.37, + "learning_rate": 3.1920213935934986e-05, + "loss": 3.1499, + "step": 46500 }, { - "epoch": 0.52, - "learning_rate": 1.7319731045675863e-05, - "loss": 3.3171, - "step": 1500 + "epoch": 1.38, + "learning_rate": 3.1862330836642316e-05, + "loss": 3.092, + "step": 46600 }, { - "epoch": 0.54, - "learning_rate": 1.7899373985624857e-05, - "loss": 3.2919, - "step": 1550 + "epoch": 1.38, + "learning_rate": 3.180444773734965e-05, + "loss": 3.0732, + "step": 46700 }, { - "epoch": 0.56, - "learning_rate": 1.8479016925573848e-05, - "loss": 3.292, - "step": 1600 + "epoch": 1.38, + "learning_rate": 3.1746564638056983e-05, + "loss": 3.0483, + "step": 46800 }, { - "epoch": 0.57, - "learning_rate": 1.905865986552284e-05, - "loss": 3.3646, - "step": 1650 + "epoch": 1.38, + "learning_rate": 3.1688681538764314e-05, + "loss": 3.0419, + "step": 46900 }, { - "epoch": 0.59, - "learning_rate": 1.9638302805471832e-05, - "loss": 3.2056, - "step": 1700 + "epoch": 1.39, + "learning_rate": 3.1630798439471644e-05, + "loss": 3.0499, + "step": 47000 }, { - "epoch": 0.61, - "learning_rate": 2.0217945745420823e-05, - "loss": 3.2624, - "step": 1750 + "epoch": 1.39, + "learning_rate": 3.1572915340178974e-05, + "loss": 3.1122, + "step": 47100 }, { - "epoch": 0.63, - "learning_rate": 2.079758868536981e-05, - "loss": 3.2349, - "step": 1800 + "epoch": 1.39, + "learning_rate": 3.1515032240886304e-05, + "loss": 3.1401, + "step": 47200 }, { - "epoch": 0.64, - "learning_rate": 2.1377231625318804e-05, - "loss": 3.2517, - "step": 1850 + "epoch": 1.4, + "learning_rate": 3.145714914159364e-05, + "loss": 3.072, + "step": 47300 }, { - "epoch": 0.66, - "learning_rate": 2.1956874565267795e-05, - "loss": 3.2804, - "step": 1900 + "epoch": 1.4, + "learning_rate": 3.139926604230097e-05, + "loss": 3.0722, + "step": 47400 }, { - "epoch": 0.68, - "learning_rate": 2.2536517505216786e-05, - "loss": 3.165, - "step": 1950 + "epoch": 1.4, + "learning_rate": 3.13413829430083e-05, + "loss": 3.0141, + "step": 47500 }, { - "epoch": 0.7, - "learning_rate": 2.311616044516578e-05, - "loss": 3.2733, - "step": 2000 + "epoch": 1.41, + "learning_rate": 3.128349984371564e-05, + "loss": 3.0253, + "step": 47600 }, { - "epoch": 0.71, - "learning_rate": 2.369580338511477e-05, - "loss": 3.2974, - "step": 2050 + "epoch": 1.41, + "learning_rate": 3.122561674442296e-05, + "loss": 3.037, + "step": 47700 }, { - "epoch": 0.73, - "learning_rate": 2.427544632506376e-05, - "loss": 3.4127, - "step": 2100 + "epoch": 1.41, + "learning_rate": 3.11677336451303e-05, + "loss": 3.0667, + "step": 47800 }, { - "epoch": 0.75, - "learning_rate": 2.4855089265012755e-05, - "loss": 3.21, - "step": 2150 + "epoch": 1.41, + "learning_rate": 3.110985054583763e-05, + "loss": 3.0826, + "step": 47900 }, { - "epoch": 0.77, - "learning_rate": 2.5434732204961743e-05, - "loss": 3.2733, - "step": 2200 + "epoch": 1.42, + "learning_rate": 3.105196744654496e-05, + "loss": 3.0285, + "step": 48000 }, { - "epoch": 0.78, - "learning_rate": 2.6014375144910737e-05, - "loss": 3.2862, - "step": 2250 + "epoch": 1.42, + "learning_rate": 3.099408434725229e-05, + "loss": 3.1176, + "step": 48100 }, { - "epoch": 0.8, - "learning_rate": 2.6594018084859727e-05, - "loss": 3.2508, - "step": 2300 + "epoch": 1.42, + "learning_rate": 3.0936201247959625e-05, + "loss": 3.0716, + "step": 48200 }, { - "epoch": 0.82, - "learning_rate": 2.7173661024808718e-05, - "loss": 3.2311, - "step": 2350 + "epoch": 1.43, + "learning_rate": 3.0878896979659885e-05, + "loss": 2.9875, + "step": 48300 }, { - "epoch": 0.83, - "learning_rate": 2.775330396475771e-05, - "loss": 3.2441, - "step": 2400 + "epoch": 1.43, + "learning_rate": 3.082101388036721e-05, + "loss": 3.0571, + "step": 48400 }, { - "epoch": 0.85, - "learning_rate": 2.8332946904706703e-05, - "loss": 3.32, - "step": 2450 + "epoch": 1.43, + "learning_rate": 3.0763130781074545e-05, + "loss": 3.0559, + "step": 48500 }, { - "epoch": 0.87, - "learning_rate": 2.891258984465569e-05, - "loss": 3.2646, - "step": 2500 + "epoch": 1.43, + "learning_rate": 3.0705247681781875e-05, + "loss": 3.072, + "step": 48600 }, { - "epoch": 0.89, - "learning_rate": 2.9492232784604684e-05, - "loss": 3.2483, - "step": 2550 + "epoch": 1.44, + "learning_rate": 3.0647364582489205e-05, + "loss": 3.0742, + "step": 48700 }, { - "epoch": 0.9, - "learning_rate": 3.0071875724553678e-05, - "loss": 3.2918, - "step": 2600 + "epoch": 1.44, + "learning_rate": 3.0589481483196535e-05, + "loss": 3.0804, + "step": 48800 }, { - "epoch": 0.92, - "learning_rate": 3.0651518664502665e-05, - "loss": 3.3011, - "step": 2650 + "epoch": 1.44, + "learning_rate": 3.053159838390387e-05, + "loss": 3.1331, + "step": 48900 }, { - "epoch": 0.94, - "learning_rate": 3.1231161604451656e-05, - "loss": 3.2694, - "step": 2700 + "epoch": 1.45, + "learning_rate": 3.04737152846112e-05, + "loss": 3.0802, + "step": 49000 }, { - "epoch": 0.96, - "learning_rate": 3.181080454440065e-05, - "loss": 3.2271, - "step": 2750 + "epoch": 1.45, + "learning_rate": 3.0415832185318532e-05, + "loss": 3.0288, + "step": 49100 }, { - "epoch": 0.97, - "learning_rate": 3.239044748434964e-05, - "loss": 3.2236, - "step": 2800 + "epoch": 1.45, + "learning_rate": 3.0357949086025862e-05, + "loss": 3.0775, + "step": 49200 }, { - "epoch": 0.99, - "learning_rate": 3.2970090424298635e-05, - "loss": 3.2899, - "step": 2850 + "epoch": 1.46, + "learning_rate": 3.0300065986733196e-05, + "loss": 3.0955, + "step": 49300 }, { - "epoch": 1.0, - "eval_gen_len": 19.3457, - "eval_loss": 3.032784938812256, - "eval_rouge1": 16.185, - "eval_rouge2": 4.0368, - "eval_rougeL": 12.9047, - "eval_rougeLsum": 14.0748, - "eval_runtime": 251.1607, - "eval_samples_per_second": 9.249, - "eval_steps_per_second": 1.159, - "step": 2875 + "epoch": 1.46, + "learning_rate": 3.024218288744053e-05, + "loss": 3.1154, + "step": 49400 }, { - "epoch": 1.01, - "learning_rate": 3.3549733364247625e-05, - "loss": 3.159, - "step": 2900 + "epoch": 1.46, + "learning_rate": 3.0184299788147856e-05, + "loss": 3.0906, + "step": 49500 }, { - "epoch": 1.03, - "learning_rate": 3.4129376304196616e-05, - "loss": 3.1081, - "step": 2950 + "epoch": 1.46, + "learning_rate": 3.0126416688855193e-05, + "loss": 3.0599, + "step": 49600 }, { - "epoch": 1.04, - "learning_rate": 3.470901924414561e-05, - "loss": 3.085, - "step": 3000 + "epoch": 1.47, + "learning_rate": 3.006853358956252e-05, + "loss": 3.0214, + "step": 49700 }, { - "epoch": 1.06, - "learning_rate": 3.5288662184094604e-05, - "loss": 3.0179, - "step": 3050 + "epoch": 1.47, + "learning_rate": 3.0010650490269853e-05, + "loss": 3.0938, + "step": 49800 }, { - "epoch": 1.08, - "learning_rate": 3.586830512404359e-05, - "loss": 3.0466, - "step": 3100 + "epoch": 1.47, + "learning_rate": 2.9953346221970113e-05, + "loss": 3.0698, + "step": 49900 }, { - "epoch": 1.1, - "learning_rate": 3.644794806399258e-05, - "loss": 3.0898, - "step": 3150 + "epoch": 1.48, + "learning_rate": 2.9895463122677443e-05, + "loss": 3.1009, + "step": 50000 }, { - "epoch": 1.11, - "learning_rate": 3.7027591003941576e-05, - "loss": 3.1355, - "step": 3200 + "epoch": 1.48, + "learning_rate": 2.9837580023384776e-05, + "loss": 3.0987, + "step": 50100 }, { - "epoch": 1.13, - "learning_rate": 3.760723394389056e-05, - "loss": 3.0855, - "step": 3250 + "epoch": 1.48, + "learning_rate": 2.9779696924092103e-05, + "loss": 3.0924, + "step": 50200 }, { - "epoch": 1.15, - "learning_rate": 3.818687688383956e-05, - "loss": 3.146, - "step": 3300 + "epoch": 1.48, + "learning_rate": 2.972181382479944e-05, + "loss": 3.0633, + "step": 50300 }, { - "epoch": 1.17, - "learning_rate": 3.876651982378855e-05, - "loss": 3.0868, - "step": 3350 + "epoch": 1.49, + "learning_rate": 2.9663930725506767e-05, + "loss": 3.0363, + "step": 50400 }, { - "epoch": 1.18, - "learning_rate": 3.934616276373754e-05, - "loss": 3.1014, - "step": 3400 + "epoch": 1.49, + "learning_rate": 2.96060476262141e-05, + "loss": 3.0711, + "step": 50500 }, { - "epoch": 1.2, - "learning_rate": 3.992580570368653e-05, - "loss": 3.0761, - "step": 3450 + "epoch": 1.49, + "learning_rate": 2.954816452692143e-05, + "loss": 3.1192, + "step": 50600 }, { - "epoch": 1.22, - "learning_rate": 4.050544864363553e-05, - "loss": 3.121, - "step": 3500 + "epoch": 1.5, + "learning_rate": 2.9490281427628764e-05, + "loss": 3.0765, + "step": 50700 }, { - "epoch": 1.23, - "learning_rate": 4.108509158358451e-05, - "loss": 3.1405, - "step": 3550 + "epoch": 1.5, + "learning_rate": 2.943239832833609e-05, + "loss": 3.1171, + "step": 50800 }, { - "epoch": 1.25, - "learning_rate": 4.166473452353351e-05, - "loss": 3.2121, - "step": 3600 + "epoch": 1.5, + "learning_rate": 2.9374515229043427e-05, + "loss": 3.1118, + "step": 50900 }, { - "epoch": 1.27, - "learning_rate": 4.22443774634825e-05, - "loss": 3.1119, - "step": 3650 + "epoch": 1.51, + "learning_rate": 2.9316632129750754e-05, + "loss": 2.9891, + "step": 51000 }, { - "epoch": 1.29, - "learning_rate": 4.282402040343149e-05, - "loss": 3.1303, - "step": 3700 + "epoch": 1.51, + "learning_rate": 2.9258749030458087e-05, + "loss": 3.1239, + "step": 51100 }, { - "epoch": 1.3, - "learning_rate": 4.340366334338048e-05, - "loss": 3.1476, - "step": 3750 + "epoch": 1.51, + "learning_rate": 2.920086593116542e-05, + "loss": 3.0719, + "step": 51200 }, { - "epoch": 1.32, - "learning_rate": 4.398330628332947e-05, - "loss": 3.1053, - "step": 3800 + "epoch": 1.51, + "learning_rate": 2.914298283187275e-05, + "loss": 3.0945, + "step": 51300 }, { - "epoch": 1.34, - "learning_rate": 4.456294922327846e-05, - "loss": 3.1109, - "step": 3850 + "epoch": 1.52, + "learning_rate": 2.9085099732580084e-05, + "loss": 3.1234, + "step": 51400 }, { - "epoch": 1.36, - "learning_rate": 4.514259216322745e-05, - "loss": 3.2268, - "step": 3900 + "epoch": 1.52, + "learning_rate": 2.9027216633287415e-05, + "loss": 3.1663, + "step": 51500 }, { - "epoch": 1.37, - "learning_rate": 4.572223510317645e-05, - "loss": 3.2088, - "step": 3950 + "epoch": 1.52, + "learning_rate": 2.8969333533994748e-05, + "loss": 3.0325, + "step": 51600 }, { - "epoch": 1.39, - "learning_rate": 4.6301878043125433e-05, - "loss": 3.175, - "step": 4000 + "epoch": 1.53, + "learning_rate": 2.8911450434702075e-05, + "loss": 3.0971, + "step": 51700 }, { - "epoch": 1.41, - "learning_rate": 4.688152098307443e-05, - "loss": 3.1848, - "step": 4050 + "epoch": 1.53, + "learning_rate": 2.8853567335409408e-05, + "loss": 3.0374, + "step": 51800 }, { - "epoch": 1.43, - "learning_rate": 4.744957106422444e-05, - "loss": 3.1789, - "step": 4100 + "epoch": 1.53, + "learning_rate": 2.879568423611674e-05, + "loss": 3.0485, + "step": 51900 }, { - "epoch": 1.44, - "learning_rate": 4.8029214004173436e-05, - "loss": 3.2485, - "step": 4150 + "epoch": 1.54, + "learning_rate": 2.8737801136824072e-05, + "loss": 3.0894, + "step": 52000 }, { - "epoch": 1.46, - "learning_rate": 4.860885694412242e-05, - "loss": 3.1635, - "step": 4200 + "epoch": 1.54, + "learning_rate": 2.8679918037531402e-05, + "loss": 3.1166, + "step": 52100 }, { - "epoch": 1.48, - "learning_rate": 4.918849988407142e-05, - "loss": 3.1433, - "step": 4250 + "epoch": 1.54, + "learning_rate": 2.8622034938238735e-05, + "loss": 3.0091, + "step": 52200 }, { - "epoch": 1.5, - "learning_rate": 4.976814282402041e-05, - "loss": 3.3169, - "step": 4300 + "epoch": 1.54, + "learning_rate": 2.856415183894607e-05, + "loss": 3.0695, + "step": 52300 }, { - "epoch": 1.51, - "learning_rate": 4.993861766992675e-05, - "loss": 3.2209, - "step": 4350 + "epoch": 1.55, + "learning_rate": 2.8506268739653396e-05, + "loss": 3.0978, + "step": 52400 }, { - "epoch": 1.53, - "learning_rate": 4.9836313786471336e-05, - "loss": 3.1356, - "step": 4400 + "epoch": 1.55, + "learning_rate": 2.844838564036073e-05, + "loss": 3.0559, + "step": 52500 }, { "epoch": 1.55, - "learning_rate": 4.973400990301592e-05, - "loss": 3.1549, - "step": 4450 + "learning_rate": 2.839050254106806e-05, + "loss": 3.0492, + "step": 52600 }, { - "epoch": 1.57, - "learning_rate": 4.96317060195605e-05, - "loss": 3.161, - "step": 4500 + "epoch": 1.56, + "learning_rate": 2.8332619441775393e-05, + "loss": 3.0304, + "step": 52700 }, { - "epoch": 1.58, - "learning_rate": 4.952940213610509e-05, - "loss": 3.2009, - "step": 4550 + "epoch": 1.56, + "learning_rate": 2.8274736342482723e-05, + "loss": 3.1342, + "step": 52800 }, { - "epoch": 1.6, - "learning_rate": 4.9427098252649675e-05, - "loss": 3.2533, - "step": 4600 + "epoch": 1.56, + "learning_rate": 2.8216853243190056e-05, + "loss": 3.076, + "step": 52900 }, { - "epoch": 1.62, - "learning_rate": 4.9324794369194254e-05, - "loss": 3.1573, - "step": 4650 + "epoch": 1.56, + "learning_rate": 2.8158970143897383e-05, + "loss": 2.9751, + "step": 53000 }, { - "epoch": 1.63, - "learning_rate": 4.922249048573884e-05, - "loss": 3.1616, - "step": 4700 + "epoch": 1.57, + "learning_rate": 2.8101087044604717e-05, + "loss": 3.0754, + "step": 53100 }, { - "epoch": 1.65, - "learning_rate": 4.912018660228343e-05, - "loss": 3.1886, - "step": 4750 + "epoch": 1.57, + "learning_rate": 2.8043203945312047e-05, + "loss": 3.0723, + "step": 53200 }, { - "epoch": 1.67, - "learning_rate": 4.901788271882801e-05, - "loss": 3.1398, - "step": 4800 + "epoch": 1.57, + "learning_rate": 2.798532084601938e-05, + "loss": 3.0367, + "step": 53300 }, { - "epoch": 1.69, - "learning_rate": 4.891557883537259e-05, - "loss": 3.2065, - "step": 4850 + "epoch": 1.58, + "learning_rate": 2.7927437746726714e-05, + "loss": 3.0745, + "step": 53400 }, { - "epoch": 1.7, - "learning_rate": 4.881327495191718e-05, - "loss": 3.151, - "step": 4900 + "epoch": 1.58, + "learning_rate": 2.7869554647434044e-05, + "loss": 3.0561, + "step": 53500 }, { - "epoch": 1.72, - "learning_rate": 4.8710971068461766e-05, - "loss": 3.1184, - "step": 4950 + "epoch": 1.58, + "learning_rate": 2.7811671548141377e-05, + "loss": 3.1498, + "step": 53600 }, { - "epoch": 1.74, - "learning_rate": 4.8608667185006345e-05, - "loss": 3.1874, - "step": 5000 + "epoch": 1.59, + "learning_rate": 2.7753788448848704e-05, + "loss": 3.0278, + "step": 53700 }, { - "epoch": 1.76, - "learning_rate": 4.8506363301550925e-05, - "loss": 3.1453, - "step": 5050 + "epoch": 1.59, + "learning_rate": 2.769590534955604e-05, + "loss": 3.0985, + "step": 53800 }, { - "epoch": 1.77, - "learning_rate": 4.840405941809552e-05, - "loss": 3.1359, - "step": 5100 + "epoch": 1.59, + "learning_rate": 2.7638022250263367e-05, + "loss": 3.0816, + "step": 53900 }, { - "epoch": 1.79, - "learning_rate": 4.83017555346401e-05, - "loss": 3.1124, - "step": 5150 + "epoch": 1.59, + "learning_rate": 2.75801391509707e-05, + "loss": 3.0399, + "step": 54000 }, { - "epoch": 1.81, - "learning_rate": 4.819945165118468e-05, - "loss": 3.2292, - "step": 5200 + "epoch": 1.6, + "learning_rate": 2.752283488267096e-05, + "loss": 3.0389, + "step": 54100 }, { - "epoch": 1.83, - "learning_rate": 4.809714776772927e-05, - "loss": 3.0806, - "step": 5250 + "epoch": 1.6, + "learning_rate": 2.746495178337829e-05, + "loss": 3.0626, + "step": 54200 }, { - "epoch": 1.84, - "learning_rate": 4.799484388427385e-05, - "loss": 3.163, - "step": 5300 + "epoch": 1.6, + "learning_rate": 2.7407068684085624e-05, + "loss": 3.0901, + "step": 54300 }, { - "epoch": 1.86, - "learning_rate": 4.789254000081843e-05, - "loss": 3.1863, - "step": 5350 + "epoch": 1.61, + "learning_rate": 2.734918558479295e-05, + "loss": 3.0882, + "step": 54400 }, { - "epoch": 1.88, - "learning_rate": 4.7790236117363015e-05, - "loss": 3.1359, - "step": 5400 + "epoch": 1.61, + "learning_rate": 2.7291302485500288e-05, + "loss": 3.1266, + "step": 54500 }, { - "epoch": 1.9, - "learning_rate": 4.76879322339076e-05, - "loss": 3.1654, - "step": 5450 + "epoch": 1.61, + "learning_rate": 2.7233419386207614e-05, + "loss": 3.0028, + "step": 54600 }, { - "epoch": 1.91, - "learning_rate": 4.758562835045218e-05, - "loss": 3.0909, - "step": 5500 + "epoch": 1.61, + "learning_rate": 2.7175536286914948e-05, + "loss": 3.0538, + "step": 54700 }, { - "epoch": 1.93, - "learning_rate": 4.748332446699677e-05, - "loss": 3.1349, - "step": 5550 + "epoch": 1.62, + "learning_rate": 2.7117653187622278e-05, + "loss": 3.0825, + "step": 54800 }, { - "epoch": 1.95, - "learning_rate": 4.7381020583541354e-05, - "loss": 3.0526, - "step": 5600 + "epoch": 1.62, + "learning_rate": 2.705977008832961e-05, + "loss": 3.0424, + "step": 54900 }, { - "epoch": 1.97, - "learning_rate": 4.727871670008594e-05, - "loss": 3.2346, - "step": 5650 + "epoch": 1.62, + "learning_rate": 2.7001886989036938e-05, + "loss": 3.1286, + "step": 55000 }, { - "epoch": 1.98, - "learning_rate": 4.717641281663052e-05, - "loss": 3.2078, - "step": 5700 + "epoch": 1.63, + "learning_rate": 2.6944003889744275e-05, + "loss": 3.1161, + "step": 55100 }, { - "epoch": 2.0, - "learning_rate": 4.7074108933175106e-05, - "loss": 3.0916, - "step": 5750 + "epoch": 1.63, + "learning_rate": 2.688612079045161e-05, + "loss": 3.0712, + "step": 55200 }, { - "epoch": 2.0, - "eval_gen_len": 19.3427, - "eval_loss": 3.0548317432403564, - "eval_rouge1": 16.2962, - "eval_rouge2": 3.9567, - "eval_rougeL": 13.0426, - "eval_rougeLsum": 14.2023, - "eval_runtime": 251.2192, - "eval_samples_per_second": 9.247, - "eval_steps_per_second": 1.158, - "step": 5750 + "epoch": 1.63, + "learning_rate": 2.6828237691158935e-05, + "loss": 3.0373, + "step": 55300 }, { - "epoch": 2.02, - "learning_rate": 4.697180504971969e-05, - "loss": 2.7401, - "step": 5800 + "epoch": 1.64, + "learning_rate": 2.677035459186627e-05, + "loss": 3.0883, + "step": 55400 }, { - "epoch": 2.03, - "learning_rate": 4.686950116626427e-05, - "loss": 2.6917, - "step": 5850 + "epoch": 1.64, + "learning_rate": 2.67124714925736e-05, + "loss": 3.041, + "step": 55500 }, { - "epoch": 2.05, - "learning_rate": 4.676719728280886e-05, - "loss": 2.7485, - "step": 5900 + "epoch": 1.64, + "learning_rate": 2.6654588393280932e-05, + "loss": 3.0623, + "step": 55600 }, { - "epoch": 2.07, - "learning_rate": 4.6664893399353445e-05, - "loss": 2.8236, - "step": 5950 + "epoch": 1.64, + "learning_rate": 2.6596705293988262e-05, + "loss": 3.1202, + "step": 55700 }, { - "epoch": 2.09, - "learning_rate": 4.6562589515898024e-05, - "loss": 2.7402, - "step": 6000 + "epoch": 1.65, + "learning_rate": 2.6538822194695596e-05, + "loss": 3.0211, + "step": 55800 }, { - "epoch": 2.1, - "learning_rate": 4.646028563244261e-05, - "loss": 2.7964, - "step": 6050 + "epoch": 1.65, + "learning_rate": 2.6480939095402923e-05, + "loss": 3.1307, + "step": 55900 }, { - "epoch": 2.12, - "learning_rate": 4.63579817489872e-05, - "loss": 2.6982, - "step": 6100 + "epoch": 1.65, + "learning_rate": 2.6423055996110256e-05, + "loss": 3.0234, + "step": 56000 }, { - "epoch": 2.14, - "learning_rate": 4.6255677865531776e-05, - "loss": 2.7642, - "step": 6150 + "epoch": 1.66, + "learning_rate": 2.6365172896817586e-05, + "loss": 3.013, + "step": 56100 }, { - "epoch": 2.16, - "learning_rate": 4.615337398207636e-05, - "loss": 2.7334, - "step": 6200 + "epoch": 1.66, + "learning_rate": 2.6307868628517846e-05, + "loss": 3.0643, + "step": 56200 }, { - "epoch": 2.17, - "learning_rate": 4.605107009862095e-05, - "loss": 2.8078, - "step": 6250 + "epoch": 1.66, + "learning_rate": 2.624998552922518e-05, + "loss": 3.046, + "step": 56300 }, { - "epoch": 2.19, - "learning_rate": 4.594876621516553e-05, - "loss": 2.7167, - "step": 6300 + "epoch": 1.66, + "learning_rate": 2.619210242993251e-05, + "loss": 2.9987, + "step": 56400 }, { - "epoch": 2.21, - "learning_rate": 4.5846462331710115e-05, - "loss": 2.8431, - "step": 6350 + "epoch": 1.67, + "learning_rate": 2.6134219330639843e-05, + "loss": 3.1, + "step": 56500 }, { - "epoch": 2.23, - "learning_rate": 4.57441584482547e-05, - "loss": 2.7301, - "step": 6400 + "epoch": 1.67, + "learning_rate": 2.6076915062340102e-05, + "loss": 3.0732, + "step": 56600 }, { - "epoch": 2.24, - "learning_rate": 4.564185456479928e-05, - "loss": 2.8079, - "step": 6450 + "epoch": 1.67, + "learning_rate": 2.601903196304743e-05, + "loss": 2.996, + "step": 56700 }, { - "epoch": 2.26, - "learning_rate": 4.553955068134387e-05, - "loss": 2.8141, - "step": 6500 + "epoch": 1.68, + "learning_rate": 2.5961148863754763e-05, + "loss": 2.9735, + "step": 56800 }, { - "epoch": 2.28, - "learning_rate": 4.5437246797888446e-05, - "loss": 2.8138, - "step": 6550 + "epoch": 1.68, + "learning_rate": 2.5903265764462093e-05, + "loss": 3.1598, + "step": 56900 }, { - "epoch": 2.3, - "learning_rate": 4.533494291443303e-05, - "loss": 2.8127, - "step": 6600 + "epoch": 1.68, + "learning_rate": 2.5845382665169426e-05, + "loss": 3.0743, + "step": 57000 }, { - "epoch": 2.31, - "learning_rate": 4.523263903097762e-05, - "loss": 2.8158, - "step": 6650 + "epoch": 1.69, + "learning_rate": 2.5787499565876756e-05, + "loss": 3.0798, + "step": 57100 }, { - "epoch": 2.33, - "learning_rate": 4.51303351475222e-05, - "loss": 2.7652, - "step": 6700 + "epoch": 1.69, + "learning_rate": 2.572961646658409e-05, + "loss": 3.0895, + "step": 57200 }, { - "epoch": 2.35, - "learning_rate": 4.5028031264066785e-05, - "loss": 2.8055, - "step": 6750 + "epoch": 1.69, + "learning_rate": 2.5671733367291416e-05, + "loss": 3.051, + "step": 57300 }, { - "epoch": 2.37, - "learning_rate": 4.492572738061137e-05, - "loss": 2.7853, - "step": 6800 + "epoch": 1.69, + "learning_rate": 2.561385026799875e-05, + "loss": 3.0544, + "step": 57400 }, { - "epoch": 2.38, - "learning_rate": 4.482342349715595e-05, - "loss": 2.7547, - "step": 6850 + "epoch": 1.7, + "learning_rate": 2.555596716870608e-05, + "loss": 3.0848, + "step": 57500 }, { - "epoch": 2.4, - "learning_rate": 4.4721119613700544e-05, - "loss": 2.7961, - "step": 6900 + "epoch": 1.7, + "learning_rate": 2.5498084069413413e-05, + "loss": 3.0787, + "step": 57600 }, { - "epoch": 2.42, - "learning_rate": 4.4618815730245123e-05, - "loss": 2.817, - "step": 6950 + "epoch": 1.7, + "learning_rate": 2.5440779801113673e-05, + "loss": 3.1021, + "step": 57700 }, { - "epoch": 2.43, - "learning_rate": 4.45165118467897e-05, - "loss": 2.8016, - "step": 7000 + "epoch": 1.71, + "learning_rate": 2.5382896701821003e-05, + "loss": 3.0226, + "step": 57800 }, { - "epoch": 2.45, - "learning_rate": 4.441420796333429e-05, - "loss": 2.7794, - "step": 7050 + "epoch": 1.71, + "learning_rate": 2.5325013602528337e-05, + "loss": 3.0637, + "step": 57900 }, { - "epoch": 2.47, - "learning_rate": 4.4311904079878876e-05, - "loss": 2.8091, - "step": 7100 + "epoch": 1.71, + "learning_rate": 2.5267130503235663e-05, + "loss": 3.1298, + "step": 58000 }, { - "epoch": 2.49, - "learning_rate": 4.4209600196423455e-05, - "loss": 2.8352, - "step": 7150 + "epoch": 1.72, + "learning_rate": 2.5209247403942997e-05, + "loss": 3.0767, + "step": 58100 }, { - "epoch": 2.5, - "learning_rate": 4.410729631296804e-05, - "loss": 2.8756, - "step": 7200 + "epoch": 1.72, + "learning_rate": 2.5151364304650327e-05, + "loss": 2.9812, + "step": 58200 }, { - "epoch": 2.52, - "learning_rate": 4.400499242951263e-05, - "loss": 2.7531, - "step": 7250 + "epoch": 1.72, + "learning_rate": 2.509348120535766e-05, + "loss": 3.0861, + "step": 58300 }, { - "epoch": 2.54, - "learning_rate": 4.390268854605721e-05, - "loss": 2.8505, - "step": 7300 + "epoch": 1.72, + "learning_rate": 2.5035598106064994e-05, + "loss": 3.1261, + "step": 58400 }, { - "epoch": 2.56, - "learning_rate": 4.3800384662601794e-05, - "loss": 2.8014, - "step": 7350 + "epoch": 1.73, + "learning_rate": 2.4977715006772324e-05, + "loss": 3.0393, + "step": 58500 }, { - "epoch": 2.57, - "learning_rate": 4.369808077914638e-05, - "loss": 2.7784, - "step": 7400 + "epoch": 1.73, + "learning_rate": 2.4919831907479654e-05, + "loss": 3.0622, + "step": 58600 }, { - "epoch": 2.59, - "learning_rate": 4.3595776895690966e-05, - "loss": 2.7593, - "step": 7450 + "epoch": 1.73, + "learning_rate": 2.4861948808186984e-05, + "loss": 3.0922, + "step": 58700 }, { - "epoch": 2.61, - "learning_rate": 4.3493473012235546e-05, - "loss": 2.7198, - "step": 7500 + "epoch": 1.74, + "learning_rate": 2.480406570889432e-05, + "loss": 3.0503, + "step": 58800 }, { - "epoch": 2.63, - "learning_rate": 4.339321520644924e-05, - "loss": 2.7886, - "step": 7550 + "epoch": 1.74, + "learning_rate": 2.474618260960165e-05, + "loss": 3.0453, + "step": 58900 }, { - "epoch": 2.64, - "learning_rate": 4.329091132299382e-05, - "loss": 2.7781, - "step": 7600 + "epoch": 1.74, + "learning_rate": 2.468829951030898e-05, + "loss": 3.0043, + "step": 59000 }, { - "epoch": 2.66, - "learning_rate": 4.318860743953841e-05, - "loss": 2.7815, - "step": 7650 + "epoch": 1.74, + "learning_rate": 2.4630416411016315e-05, + "loss": 3.0512, + "step": 59100 }, { - "epoch": 2.68, - "learning_rate": 4.3086303556082994e-05, - "loss": 2.8369, - "step": 7700 + "epoch": 1.75, + "learning_rate": 2.4572533311723645e-05, + "loss": 3.0278, + "step": 59200 }, { - "epoch": 2.7, - "learning_rate": 4.2983999672627574e-05, - "loss": 2.7718, - "step": 7750 + "epoch": 1.75, + "learning_rate": 2.4514650212430975e-05, + "loss": 3.0435, + "step": 59300 }, { - "epoch": 2.71, - "learning_rate": 4.288169578917216e-05, - "loss": 2.7706, - "step": 7800 + "epoch": 1.75, + "learning_rate": 2.445676711313831e-05, + "loss": 3.0182, + "step": 59400 }, { - "epoch": 2.73, - "learning_rate": 4.277939190571674e-05, - "loss": 2.7951, - "step": 7850 + "epoch": 1.76, + "learning_rate": 2.439888401384564e-05, + "loss": 3.013, + "step": 59500 }, { - "epoch": 2.75, - "learning_rate": 4.2677088022261326e-05, - "loss": 2.8291, - "step": 7900 + "epoch": 1.76, + "learning_rate": 2.434100091455297e-05, + "loss": 3.1001, + "step": 59600 }, { - "epoch": 2.77, - "learning_rate": 4.257478413880591e-05, - "loss": 2.8511, - "step": 7950 + "epoch": 1.76, + "learning_rate": 2.4283117815260302e-05, + "loss": 3.0871, + "step": 59700 }, { - "epoch": 2.78, - "learning_rate": 4.247248025535049e-05, - "loss": 2.8155, - "step": 8000 + "epoch": 1.77, + "learning_rate": 2.4225234715967632e-05, + "loss": 3.0644, + "step": 59800 }, { - "epoch": 2.8, - "learning_rate": 4.2370176371895085e-05, - "loss": 2.8248, - "step": 8050 + "epoch": 1.77, + "learning_rate": 2.4167351616674962e-05, + "loss": 3.0736, + "step": 59900 }, { - "epoch": 2.82, - "learning_rate": 4.2267872488439664e-05, - "loss": 2.8097, - "step": 8100 + "epoch": 1.77, + "learning_rate": 2.4109468517382296e-05, + "loss": 3.0558, + "step": 60000 }, { - "epoch": 2.83, - "learning_rate": 4.2165568604984244e-05, - "loss": 2.8237, - "step": 8150 + "epoch": 1.77, + "learning_rate": 2.405158541808963e-05, + "loss": 3.0976, + "step": 60100 }, { - "epoch": 2.85, - "learning_rate": 4.206326472152883e-05, - "loss": 2.8494, - "step": 8200 + "epoch": 1.78, + "learning_rate": 2.399370231879696e-05, + "loss": 3.0441, + "step": 60200 }, { - "epoch": 2.87, - "learning_rate": 4.1960960838073416e-05, - "loss": 2.8382, - "step": 8250 + "epoch": 1.78, + "learning_rate": 2.393581921950429e-05, + "loss": 3.0623, + "step": 60300 }, { - "epoch": 2.89, - "learning_rate": 4.1858656954617996e-05, - "loss": 2.8271, - "step": 8300 + "epoch": 1.78, + "learning_rate": 2.3877936120211623e-05, + "loss": 3.0432, + "step": 60400 }, { - "epoch": 2.9, - "learning_rate": 4.175635307116258e-05, - "loss": 2.8145, - "step": 8350 + "epoch": 1.79, + "learning_rate": 2.3820053020918953e-05, + "loss": 3.031, + "step": 60500 }, { - "epoch": 2.92, - "learning_rate": 4.165404918770717e-05, - "loss": 2.8698, - "step": 8400 + "epoch": 1.79, + "learning_rate": 2.3762169921626283e-05, + "loss": 3.0308, + "step": 60600 }, { - "epoch": 2.94, - "learning_rate": 4.155174530425175e-05, - "loss": 2.8377, - "step": 8450 + "epoch": 1.79, + "learning_rate": 2.3704286822333617e-05, + "loss": 3.0621, + "step": 60700 }, { - "epoch": 2.96, - "learning_rate": 4.1449441420796334e-05, - "loss": 2.7838, - "step": 8500 + "epoch": 1.79, + "learning_rate": 2.3646403723040947e-05, + "loss": 2.9949, + "step": 60800 }, { - "epoch": 2.97, - "learning_rate": 4.134713753734092e-05, - "loss": 2.7709, - "step": 8550 + "epoch": 1.8, + "learning_rate": 2.3588520623748277e-05, + "loss": 3.0625, + "step": 60900 }, { - "epoch": 2.99, - "learning_rate": 4.124483365388551e-05, - "loss": 2.8345, - "step": 8600 + "epoch": 1.8, + "learning_rate": 2.353063752445561e-05, + "loss": 3.0175, + "step": 61000 }, { - "epoch": 3.0, - "eval_gen_len": 19.9707, - "eval_loss": 3.0645270347595215, - "eval_rouge1": 16.4597, - "eval_rouge2": 4.2017, - "eval_rougeL": 13.3787, - "eval_rougeLsum": 14.5527, - "eval_runtime": 250.7875, - "eval_samples_per_second": 9.263, - "eval_steps_per_second": 1.16, - "step": 8625 - }, - { - "epoch": 3.01, - "learning_rate": 4.114252977043009e-05, - "loss": 2.5888, - "step": 8650 + "epoch": 1.8, + "learning_rate": 2.3472754425162944e-05, + "loss": 2.9987, + "step": 61100 }, { - "epoch": 3.03, - "learning_rate": 4.104022588697467e-05, - "loss": 2.3788, - "step": 8700 + "epoch": 1.81, + "learning_rate": 2.3414871325870274e-05, + "loss": 2.998, + "step": 61200 }, { - "epoch": 3.04, - "learning_rate": 4.093792200351926e-05, - "loss": 2.4263, - "step": 8750 + "epoch": 1.81, + "learning_rate": 2.3356988226577607e-05, + "loss": 3.0248, + "step": 61300 }, { - "epoch": 3.06, - "learning_rate": 4.083561812006384e-05, - "loss": 2.3851, - "step": 8800 + "epoch": 1.81, + "learning_rate": 2.3299105127284938e-05, + "loss": 3.0506, + "step": 61400 }, { - "epoch": 3.08, - "learning_rate": 4.0733314236608425e-05, - "loss": 2.378, - "step": 8850 + "epoch": 1.82, + "learning_rate": 2.3241222027992268e-05, + "loss": 3.1104, + "step": 61500 }, { - "epoch": 3.1, - "learning_rate": 4.063101035315301e-05, - "loss": 2.3572, - "step": 8900 + "epoch": 1.82, + "learning_rate": 2.31833389286996e-05, + "loss": 3.0363, + "step": 61600 }, { - "epoch": 3.11, - "learning_rate": 4.052870646969759e-05, - "loss": 2.4259, - "step": 8950 + "epoch": 1.82, + "learning_rate": 2.312545582940693e-05, + "loss": 3.0894, + "step": 61700 }, { - "epoch": 3.13, - "learning_rate": 4.042640258624217e-05, - "loss": 2.4093, - "step": 9000 + "epoch": 1.82, + "learning_rate": 2.306757273011426e-05, + "loss": 3.0058, + "step": 61800 }, { - "epoch": 3.15, - "learning_rate": 4.0324098702786764e-05, - "loss": 2.3708, - "step": 9050 + "epoch": 1.83, + "learning_rate": 2.300968963082159e-05, + "loss": 2.9939, + "step": 61900 }, { - "epoch": 3.17, - "learning_rate": 4.022179481933134e-05, - "loss": 2.3737, - "step": 9100 + "epoch": 1.83, + "learning_rate": 2.2951806531528925e-05, + "loss": 2.9725, + "step": 62000 }, { - "epoch": 3.18, - "learning_rate": 4.011949093587592e-05, - "loss": 2.4567, - "step": 9150 + "epoch": 1.83, + "learning_rate": 2.2893923432236255e-05, + "loss": 3.0223, + "step": 62100 }, { - "epoch": 3.2, - "learning_rate": 4.0017187052420516e-05, - "loss": 2.4837, - "step": 9200 + "epoch": 1.84, + "learning_rate": 2.283604033294359e-05, + "loss": 3.0144, + "step": 62200 }, { - "epoch": 3.22, - "learning_rate": 3.9914883168965095e-05, - "loss": 2.4936, - "step": 9250 + "epoch": 1.84, + "learning_rate": 2.2778157233650922e-05, + "loss": 3.0021, + "step": 62300 }, { - "epoch": 3.23, - "learning_rate": 3.981257928550968e-05, - "loss": 2.4992, - "step": 9300 + "epoch": 1.84, + "learning_rate": 2.2720274134358252e-05, + "loss": 2.9874, + "step": 62400 }, { - "epoch": 3.25, - "learning_rate": 3.971027540205426e-05, - "loss": 2.4514, - "step": 9350 + "epoch": 1.85, + "learning_rate": 2.2662391035065582e-05, + "loss": 3.0061, + "step": 62500 }, { - "epoch": 3.27, - "learning_rate": 3.960797151859885e-05, - "loss": 2.4341, - "step": 9400 + "epoch": 1.85, + "learning_rate": 2.2604507935772916e-05, + "loss": 2.9589, + "step": 62600 }, { - "epoch": 3.29, - "learning_rate": 3.9505667635143434e-05, - "loss": 2.4617, - "step": 9450 + "epoch": 1.85, + "learning_rate": 2.2546624836480246e-05, + "loss": 3.0054, + "step": 62700 }, { - "epoch": 3.3, - "learning_rate": 3.940336375168801e-05, - "loss": 2.489, - "step": 9500 + "epoch": 1.85, + "learning_rate": 2.2488741737187576e-05, + "loss": 3.0209, + "step": 62800 }, { - "epoch": 3.32, - "learning_rate": 3.93010598682326e-05, - "loss": 2.4343, - "step": 9550 + "epoch": 1.86, + "learning_rate": 2.243085863789491e-05, + "loss": 3.0615, + "step": 62900 }, { - "epoch": 3.34, - "learning_rate": 3.9198755984777186e-05, - "loss": 2.4264, - "step": 9600 + "epoch": 1.86, + "learning_rate": 2.237297553860224e-05, + "loss": 2.9817, + "step": 63000 }, { - "epoch": 3.36, - "learning_rate": 3.9096452101321766e-05, - "loss": 2.4492, - "step": 9650 + "epoch": 1.86, + "learning_rate": 2.231509243930957e-05, + "loss": 3.0172, + "step": 63100 }, { - "epoch": 3.37, - "learning_rate": 3.899414821786635e-05, - "loss": 2.4625, - "step": 9700 + "epoch": 1.87, + "learning_rate": 2.2257209340016903e-05, + "loss": 3.0459, + "step": 63200 }, { - "epoch": 3.39, - "learning_rate": 3.889184433441094e-05, - "loss": 2.4601, - "step": 9750 + "epoch": 1.87, + "learning_rate": 2.2199326240724233e-05, + "loss": 3.0939, + "step": 63300 }, { - "epoch": 3.41, - "learning_rate": 3.878954045095552e-05, - "loss": 2.4262, - "step": 9800 + "epoch": 1.87, + "learning_rate": 2.2141443141431567e-05, + "loss": 3.0101, + "step": 63400 }, { - "epoch": 3.43, - "learning_rate": 3.8687236567500104e-05, - "loss": 2.4918, - "step": 9850 + "epoch": 1.87, + "learning_rate": 2.2083560042138897e-05, + "loss": 3.0242, + "step": 63500 }, { - "epoch": 3.44, - "learning_rate": 3.858493268404469e-05, - "loss": 2.4677, - "step": 9900 + "epoch": 1.88, + "learning_rate": 2.202567694284623e-05, + "loss": 3.0988, + "step": 63600 }, { - "epoch": 3.46, - "learning_rate": 3.848262880058927e-05, - "loss": 2.4299, - "step": 9950 + "epoch": 1.88, + "learning_rate": 2.196779384355356e-05, + "loss": 3.089, + "step": 63700 }, { - "epoch": 3.48, - "learning_rate": 3.8380324917133856e-05, - "loss": 2.4229, - "step": 10000 + "epoch": 1.88, + "learning_rate": 2.190991074426089e-05, + "loss": 3.1095, + "step": 63800 }, { - "epoch": 3.5, - "learning_rate": 3.827802103367844e-05, - "loss": 2.4598, - "step": 10050 + "epoch": 1.89, + "learning_rate": 2.185260647596115e-05, + "loss": 3.0332, + "step": 63900 }, { - "epoch": 3.51, - "learning_rate": 3.817571715022302e-05, - "loss": 2.4382, - "step": 10100 + "epoch": 1.89, + "learning_rate": 2.1794723376668483e-05, + "loss": 3.0994, + "step": 64000 }, { - "epoch": 3.53, - "learning_rate": 3.807341326676761e-05, - "loss": 2.4227, - "step": 10150 + "epoch": 1.89, + "learning_rate": 2.1736840277375814e-05, + "loss": 3.0328, + "step": 64100 }, { - "epoch": 3.55, - "learning_rate": 3.7971109383312195e-05, - "loss": 2.4821, - "step": 10200 + "epoch": 1.9, + "learning_rate": 2.1678957178083144e-05, + "loss": 3.0795, + "step": 64200 }, { - "epoch": 3.57, - "learning_rate": 3.7868805499856774e-05, - "loss": 2.4532, - "step": 10250 + "epoch": 1.9, + "learning_rate": 2.1621074078790477e-05, + "loss": 3.0247, + "step": 64300 }, { - "epoch": 3.58, - "learning_rate": 3.776650161640136e-05, - "loss": 2.4879, - "step": 10300 + "epoch": 1.9, + "learning_rate": 2.1563190979497807e-05, + "loss": 3.0352, + "step": 64400 }, { - "epoch": 3.6, - "learning_rate": 3.766419773294595e-05, - "loss": 2.4945, - "step": 10350 + "epoch": 1.9, + "learning_rate": 2.1505307880205137e-05, + "loss": 3.0503, + "step": 64500 }, { - "epoch": 3.62, - "learning_rate": 3.7561893849490526e-05, - "loss": 2.5232, - "step": 10400 + "epoch": 1.91, + "learning_rate": 2.144742478091247e-05, + "loss": 3.0047, + "step": 64600 }, { - "epoch": 3.63, - "learning_rate": 3.745958996603511e-05, - "loss": 2.5075, - "step": 10450 + "epoch": 1.91, + "learning_rate": 2.13895416816198e-05, + "loss": 3.0388, + "step": 64700 }, { - "epoch": 3.65, - "learning_rate": 3.735728608257969e-05, - "loss": 2.4174, - "step": 10500 + "epoch": 1.91, + "learning_rate": 2.133165858232713e-05, + "loss": 3.0377, + "step": 64800 }, { - "epoch": 3.67, - "learning_rate": 3.7254982199124285e-05, - "loss": 2.4856, - "step": 10550 + "epoch": 1.92, + "learning_rate": 2.1273775483034465e-05, + "loss": 3.0398, + "step": 64900 }, { - "epoch": 3.69, - "learning_rate": 3.7152678315668865e-05, - "loss": 2.4594, - "step": 10600 + "epoch": 1.92, + "learning_rate": 2.1215892383741795e-05, + "loss": 3.0546, + "step": 65000 }, { - "epoch": 3.7, - "learning_rate": 3.7050374432213444e-05, - "loss": 2.4923, - "step": 10650 + "epoch": 1.92, + "learning_rate": 2.1158009284449125e-05, + "loss": 3.019, + "step": 65100 }, { - "epoch": 3.72, - "learning_rate": 3.694807054875804e-05, - "loss": 2.3824, - "step": 10700 + "epoch": 1.92, + "learning_rate": 2.110012618515646e-05, + "loss": 3.0192, + "step": 65200 }, { - "epoch": 3.74, - "learning_rate": 3.684576666530262e-05, - "loss": 2.4486, - "step": 10750 + "epoch": 1.93, + "learning_rate": 2.1042243085863792e-05, + "loss": 2.9797, + "step": 65300 }, { - "epoch": 3.76, - "learning_rate": 3.67434627818472e-05, - "loss": 2.4735, - "step": 10800 + "epoch": 1.93, + "learning_rate": 2.0984359986571122e-05, + "loss": 3.1248, + "step": 65400 }, { - "epoch": 3.77, - "learning_rate": 3.664115889839179e-05, - "loss": 2.4633, - "step": 10850 + "epoch": 1.93, + "learning_rate": 2.0926476887278455e-05, + "loss": 3.0729, + "step": 65500 }, { - "epoch": 3.79, - "learning_rate": 3.653885501493637e-05, - "loss": 2.5318, - "step": 10900 + "epoch": 1.94, + "learning_rate": 2.0868593787985785e-05, + "loss": 3.0108, + "step": 65600 }, { - "epoch": 3.81, - "learning_rate": 3.643655113148095e-05, - "loss": 2.4771, - "step": 10950 + "epoch": 1.94, + "learning_rate": 2.0810710688693116e-05, + "loss": 2.9739, + "step": 65700 }, { - "epoch": 3.83, - "learning_rate": 3.6334247248025535e-05, - "loss": 2.5253, - "step": 11000 + "epoch": 1.94, + "learning_rate": 2.075282758940045e-05, + "loss": 3.058, + "step": 65800 }, { - "epoch": 3.84, - "learning_rate": 3.623194336457012e-05, - "loss": 2.4974, - "step": 11050 + "epoch": 1.95, + "learning_rate": 2.069494449010778e-05, + "loss": 3.0111, + "step": 65900 }, { - "epoch": 3.86, - "learning_rate": 3.61296394811147e-05, - "loss": 2.4386, - "step": 11100 + "epoch": 1.95, + "learning_rate": 2.063706139081511e-05, + "loss": 3.075, + "step": 66000 }, { - "epoch": 3.88, - "learning_rate": 3.602733559765929e-05, - "loss": 2.4687, - "step": 11150 + "epoch": 1.95, + "learning_rate": 2.0579178291522443e-05, + "loss": 3.0493, + "step": 66100 }, { - "epoch": 3.9, - "learning_rate": 3.5925031714203874e-05, - "loss": 2.4287, - "step": 11200 + "epoch": 1.95, + "learning_rate": 2.0521295192229773e-05, + "loss": 2.9718, + "step": 66200 }, { - "epoch": 3.91, - "learning_rate": 3.582272783074846e-05, - "loss": 2.5498, - "step": 11250 + "epoch": 1.96, + "learning_rate": 2.0463412092937106e-05, + "loss": 3.0471, + "step": 66300 }, { - "epoch": 3.93, - "learning_rate": 3.572042394729304e-05, - "loss": 2.5195, - "step": 11300 + "epoch": 1.96, + "learning_rate": 2.0405528993644436e-05, + "loss": 3.0502, + "step": 66400 }, { - "epoch": 3.95, - "learning_rate": 3.5618120063837626e-05, - "loss": 2.5423, - "step": 11350 + "epoch": 1.96, + "learning_rate": 2.0348803556337622e-05, + "loss": 3.0482, + "step": 66500 }, { - "epoch": 3.97, - "learning_rate": 3.551581618038221e-05, - "loss": 2.4865, - "step": 11400 + "epoch": 1.97, + "learning_rate": 2.0290920457044955e-05, + "loss": 3.0391, + "step": 66600 }, { - "epoch": 3.98, - "learning_rate": 3.541351229692679e-05, - "loss": 2.4827, - "step": 11450 + "epoch": 1.97, + "learning_rate": 2.0233037357752285e-05, + "loss": 2.9687, + "step": 66700 }, { - "epoch": 4.0, - "learning_rate": 3.531120841347138e-05, - "loss": 2.5522, - "step": 11500 + "epoch": 1.97, + "learning_rate": 2.0175154258459616e-05, + "loss": 3.0298, + "step": 66800 }, { - "epoch": 4.0, - "eval_gen_len": 19.9324, - "eval_loss": 3.0988194942474365, - "eval_rouge1": 16.8388, - "eval_rouge2": 4.3742, - "eval_rougeL": 13.5688, - "eval_rougeLsum": 14.7003, - "eval_runtime": 251.1664, - "eval_samples_per_second": 9.249, - "eval_steps_per_second": 1.159, - "step": 11500 + "epoch": 1.97, + "learning_rate": 2.011727115916695e-05, + "loss": 3.0962, + "step": 66900 }, { - "epoch": 4.02, - "learning_rate": 3.5208904530015964e-05, - "loss": 2.1091, - "step": 11550 + "epoch": 1.98, + "learning_rate": 2.005938805987428e-05, + "loss": 3.0824, + "step": 67000 }, { - "epoch": 4.03, - "learning_rate": 3.5106600646560544e-05, - "loss": 2.0875, - "step": 11600 + "epoch": 1.98, + "learning_rate": 2.000150496058161e-05, + "loss": 3.0595, + "step": 67100 }, { - "epoch": 4.05, - "learning_rate": 3.500429676310512e-05, - "loss": 2.1041, - "step": 11650 + "epoch": 1.98, + "learning_rate": 1.9943621861288943e-05, + "loss": 2.9599, + "step": 67200 }, { - "epoch": 4.07, - "learning_rate": 3.4901992879649716e-05, - "loss": 2.1667, - "step": 11700 + "epoch": 1.99, + "learning_rate": 1.9885738761996273e-05, + "loss": 2.9756, + "step": 67300 }, { - "epoch": 4.09, - "learning_rate": 3.4799688996194296e-05, - "loss": 2.0991, - "step": 11750 + "epoch": 1.99, + "learning_rate": 1.9827855662703603e-05, + "loss": 2.9989, + "step": 67400 }, { - "epoch": 4.1, - "learning_rate": 3.469738511273888e-05, - "loss": 2.1036, - "step": 11800 + "epoch": 1.99, + "learning_rate": 1.9769972563410936e-05, + "loss": 3.0489, + "step": 67500 }, { - "epoch": 4.12, - "learning_rate": 3.459508122928347e-05, - "loss": 2.0709, - "step": 11850 + "epoch": 2.0, + "learning_rate": 1.971208946411827e-05, + "loss": 3.0045, + "step": 67600 }, { - "epoch": 4.14, - "learning_rate": 3.449277734582805e-05, - "loss": 2.1313, - "step": 11900 + "epoch": 2.0, + "learning_rate": 1.96542063648256e-05, + "loss": 2.9764, + "step": 67700 }, { - "epoch": 4.16, - "learning_rate": 3.4390473462372634e-05, - "loss": 2.1212, - "step": 11950 + "epoch": 2.0, + "eval_gen_len": 19.9174, + "eval_loss": 3.12784481048584, + "eval_rouge1": 18.6558, + "eval_rouge2": 5.1844, + "eval_rougeL": 15.0939, + "eval_rougeLsum": 16.3367, + "eval_runtime": 747.2706, + "eval_samples_per_second": 10.073, + "eval_steps_per_second": 2.518, + "step": 67750 }, { - "epoch": 4.17, - "learning_rate": 3.428816957891722e-05, - "loss": 2.1679, - "step": 12000 + "epoch": 2.0, + "learning_rate": 1.959632326553293e-05, + "loss": 2.7806, + "step": 67800 }, { - "epoch": 4.19, - "learning_rate": 3.41858656954618e-05, - "loss": 2.1259, - "step": 12050 + "epoch": 2.0, + "learning_rate": 1.9538440166240264e-05, + "loss": 2.6776, + "step": 67900 }, { - "epoch": 4.21, - "learning_rate": 3.408356181200639e-05, - "loss": 2.0973, - "step": 12100 + "epoch": 2.01, + "learning_rate": 1.9480557066947594e-05, + "loss": 2.6714, + "step": 68000 }, { - "epoch": 4.23, - "learning_rate": 3.3981257928550966e-05, - "loss": 2.1457, - "step": 12150 + "epoch": 2.01, + "learning_rate": 1.9422673967654924e-05, + "loss": 2.6082, + "step": 68100 }, { - "epoch": 4.24, - "learning_rate": 3.387895404509555e-05, - "loss": 2.1776, - "step": 12200 + "epoch": 2.01, + "learning_rate": 1.9364790868362257e-05, + "loss": 2.6635, + "step": 68200 }, { - "epoch": 4.26, - "learning_rate": 3.377665016164014e-05, - "loss": 2.202, - "step": 12250 + "epoch": 2.02, + "learning_rate": 1.9306907769069587e-05, + "loss": 2.6254, + "step": 68300 }, { - "epoch": 4.28, - "learning_rate": 3.367639235585383e-05, - "loss": 2.1676, - "step": 12300 + "epoch": 2.02, + "learning_rate": 1.9249024669776918e-05, + "loss": 2.6649, + "step": 68400 }, { - "epoch": 4.3, - "learning_rate": 3.3574088472398414e-05, - "loss": 2.1375, - "step": 12350 + "epoch": 2.02, + "learning_rate": 1.919114157048425e-05, + "loss": 2.6291, + "step": 68500 }, { - "epoch": 4.31, - "learning_rate": 3.3471784588943e-05, - "loss": 2.1265, - "step": 12400 + "epoch": 2.03, + "learning_rate": 1.913325847119158e-05, + "loss": 2.6979, + "step": 68600 }, { - "epoch": 4.33, - "learning_rate": 3.336948070548758e-05, - "loss": 2.1044, - "step": 12450 + "epoch": 2.03, + "learning_rate": 1.907537537189891e-05, + "loss": 2.6635, + "step": 68700 }, { - "epoch": 4.35, - "learning_rate": 3.326717682203217e-05, - "loss": 2.1207, - "step": 12500 + "epoch": 2.03, + "learning_rate": 1.9017492272606248e-05, + "loss": 2.6288, + "step": 68800 }, { - "epoch": 4.37, - "learning_rate": 3.316487293857675e-05, - "loss": 2.1651, - "step": 12550 + "epoch": 2.03, + "learning_rate": 1.8959609173313578e-05, + "loss": 2.6401, + "step": 68900 }, { - "epoch": 4.38, - "learning_rate": 3.306256905512133e-05, - "loss": 2.1117, - "step": 12600 + "epoch": 2.04, + "learning_rate": 1.890172607402091e-05, + "loss": 2.6152, + "step": 69000 }, { - "epoch": 4.4, - "learning_rate": 3.296026517166592e-05, - "loss": 2.1279, - "step": 12650 + "epoch": 2.04, + "learning_rate": 1.8843842974728242e-05, + "loss": 2.6912, + "step": 69100 }, { - "epoch": 4.42, - "learning_rate": 3.2857961288210505e-05, - "loss": 2.1459, - "step": 12700 + "epoch": 2.04, + "learning_rate": 1.8785959875435572e-05, + "loss": 2.6413, + "step": 69200 }, { - "epoch": 4.43, - "learning_rate": 3.2755657404755085e-05, - "loss": 2.1566, - "step": 12750 + "epoch": 2.05, + "learning_rate": 1.8728076776142902e-05, + "loss": 2.6168, + "step": 69300 }, { - "epoch": 4.45, - "learning_rate": 3.265335352129967e-05, - "loss": 2.1036, - "step": 12800 + "epoch": 2.05, + "learning_rate": 1.8670193676850235e-05, + "loss": 2.661, + "step": 69400 }, { - "epoch": 4.47, - "learning_rate": 3.255104963784426e-05, - "loss": 2.1034, - "step": 12850 + "epoch": 2.05, + "learning_rate": 1.8612310577557566e-05, + "loss": 2.6739, + "step": 69500 }, { - "epoch": 4.49, - "learning_rate": 3.244874575438884e-05, - "loss": 2.096, - "step": 12900 + "epoch": 2.05, + "learning_rate": 1.8554427478264896e-05, + "loss": 2.6745, + "step": 69600 }, { - "epoch": 4.5, - "learning_rate": 3.234644187093342e-05, - "loss": 2.123, - "step": 12950 + "epoch": 2.06, + "learning_rate": 1.849654437897223e-05, + "loss": 2.7476, + "step": 69700 }, { - "epoch": 4.52, - "learning_rate": 3.224413798747801e-05, - "loss": 2.1869, - "step": 13000 + "epoch": 2.06, + "learning_rate": 1.843924011067249e-05, + "loss": 2.6416, + "step": 69800 }, { - "epoch": 4.54, - "learning_rate": 3.214183410402259e-05, - "loss": 2.1858, - "step": 13050 + "epoch": 2.06, + "learning_rate": 1.838135701137982e-05, + "loss": 2.6431, + "step": 69900 }, { - "epoch": 4.56, - "learning_rate": 3.2039530220567175e-05, - "loss": 2.2084, - "step": 13100 + "epoch": 2.07, + "learning_rate": 1.832347391208715e-05, + "loss": 2.6663, + "step": 70000 }, { - "epoch": 4.57, - "learning_rate": 3.193722633711176e-05, - "loss": 2.1973, - "step": 13150 + "epoch": 2.07, + "learning_rate": 1.8265590812794482e-05, + "loss": 2.5952, + "step": 70100 }, { - "epoch": 4.59, - "learning_rate": 3.183492245365634e-05, - "loss": 2.1373, - "step": 13200 + "epoch": 2.07, + "learning_rate": 1.8207707713501812e-05, + "loss": 2.6954, + "step": 70200 }, { - "epoch": 4.61, - "learning_rate": 3.173261857020093e-05, - "loss": 2.1881, - "step": 13250 + "epoch": 2.08, + "learning_rate": 1.8149824614209143e-05, + "loss": 2.6513, + "step": 70300 }, { - "epoch": 4.63, - "learning_rate": 3.163031468674551e-05, - "loss": 2.2398, - "step": 13300 + "epoch": 2.08, + "learning_rate": 1.8091941514916476e-05, + "loss": 2.6924, + "step": 70400 }, { - "epoch": 4.64, - "learning_rate": 3.152801080329009e-05, - "loss": 2.1807, - "step": 13350 + "epoch": 2.08, + "learning_rate": 1.8034058415623806e-05, + "loss": 2.7011, + "step": 70500 }, { - "epoch": 4.66, - "learning_rate": 3.142570691983468e-05, - "loss": 2.1782, - "step": 13400 + "epoch": 2.08, + "learning_rate": 1.797617531633114e-05, + "loss": 2.734, + "step": 70600 }, { - "epoch": 4.68, - "learning_rate": 3.132340303637926e-05, - "loss": 2.215, - "step": 13450 + "epoch": 2.09, + "learning_rate": 1.791829221703847e-05, + "loss": 2.6728, + "step": 70700 }, { - "epoch": 4.7, - "learning_rate": 3.1221099152923846e-05, - "loss": 2.131, - "step": 13500 + "epoch": 2.09, + "learning_rate": 1.7860409117745803e-05, + "loss": 2.6891, + "step": 70800 }, { - "epoch": 4.71, - "learning_rate": 3.111879526946843e-05, - "loss": 2.2367, - "step": 13550 + "epoch": 2.09, + "learning_rate": 1.7802526018453133e-05, + "loss": 2.6044, + "step": 70900 }, { - "epoch": 4.73, - "learning_rate": 3.101649138601301e-05, - "loss": 2.2064, - "step": 13600 + "epoch": 2.1, + "learning_rate": 1.7744642919160463e-05, + "loss": 2.7256, + "step": 71000 }, { - "epoch": 4.75, - "learning_rate": 3.09141875025576e-05, - "loss": 2.224, - "step": 13650 + "epoch": 2.1, + "learning_rate": 1.7686759819867797e-05, + "loss": 2.5696, + "step": 71100 }, { - "epoch": 4.77, - "learning_rate": 3.0811883619102184e-05, - "loss": 2.1554, - "step": 13700 + "epoch": 2.1, + "learning_rate": 1.7628876720575127e-05, + "loss": 2.6064, + "step": 71200 }, { - "epoch": 4.78, - "learning_rate": 3.0709579735646764e-05, - "loss": 2.1551, - "step": 13750 + "epoch": 2.1, + "learning_rate": 1.7570993621282457e-05, + "loss": 2.6711, + "step": 71300 }, { - "epoch": 4.8, - "learning_rate": 3.060727585219135e-05, - "loss": 2.1932, - "step": 13800 + "epoch": 2.11, + "learning_rate": 1.751311052198979e-05, + "loss": 2.6586, + "step": 71400 }, { - "epoch": 4.82, - "learning_rate": 3.0504971968735936e-05, - "loss": 2.2028, - "step": 13850 + "epoch": 2.11, + "learning_rate": 1.745522742269712e-05, + "loss": 2.6367, + "step": 71500 }, { - "epoch": 4.83, - "learning_rate": 3.0402668085280516e-05, - "loss": 2.1582, - "step": 13900 + "epoch": 2.11, + "learning_rate": 1.739734432340445e-05, + "loss": 2.5933, + "step": 71600 }, { - "epoch": 4.85, - "learning_rate": 3.0300364201825105e-05, - "loss": 2.2122, - "step": 13950 + "epoch": 2.12, + "learning_rate": 1.7339461224111784e-05, + "loss": 2.7044, + "step": 71700 }, { - "epoch": 4.87, - "learning_rate": 3.0198060318369685e-05, - "loss": 2.2249, - "step": 14000 + "epoch": 2.12, + "learning_rate": 1.7281578124819118e-05, + "loss": 2.6311, + "step": 71800 }, { - "epoch": 4.89, - "learning_rate": 3.0095756434914268e-05, - "loss": 2.1797, - "step": 14050 + "epoch": 2.12, + "learning_rate": 1.7223695025526448e-05, + "loss": 2.6212, + "step": 71900 }, { - "epoch": 4.9, - "learning_rate": 2.9993452551458858e-05, - "loss": 2.186, - "step": 14100 + "epoch": 2.13, + "learning_rate": 1.7165811926233778e-05, + "loss": 2.6314, + "step": 72000 }, { - "epoch": 4.92, - "learning_rate": 2.9891148668003437e-05, - "loss": 2.2225, - "step": 14150 + "epoch": 2.13, + "learning_rate": 1.710792882694111e-05, + "loss": 2.7652, + "step": 72100 }, { - "epoch": 4.94, - "learning_rate": 2.9788844784548027e-05, - "loss": 2.1545, - "step": 14200 + "epoch": 2.13, + "learning_rate": 1.705004572764844e-05, + "loss": 2.6811, + "step": 72200 }, { - "epoch": 4.96, - "learning_rate": 2.9686540901092606e-05, - "loss": 2.2314, - "step": 14250 + "epoch": 2.13, + "learning_rate": 1.6992741459348698e-05, + "loss": 2.6055, + "step": 72300 }, { - "epoch": 4.97, - "learning_rate": 2.958423701763719e-05, - "loss": 2.1955, - "step": 14300 + "epoch": 2.14, + "learning_rate": 1.693485836005603e-05, + "loss": 2.6991, + "step": 72400 }, { - "epoch": 4.99, - "learning_rate": 2.9481933134181776e-05, - "loss": 2.2307, - "step": 14350 + "epoch": 2.14, + "learning_rate": 1.6876975260763365e-05, + "loss": 2.6355, + "step": 72500 }, { - "epoch": 5.0, - "eval_gen_len": 19.8502, - "eval_loss": 3.2058229446411133, - "eval_rouge1": 16.4764, - "eval_rouge2": 4.2906, - "eval_rougeL": 13.3875, - "eval_rougeLsum": 14.5223, - "eval_runtime": 251.2531, - "eval_samples_per_second": 9.246, - "eval_steps_per_second": 1.158, - "step": 14375 + "epoch": 2.14, + "learning_rate": 1.6819092161470695e-05, + "loss": 2.6464, + "step": 72600 }, { - "epoch": 5.01, - "learning_rate": 2.937962925072636e-05, - "loss": 2.0556, - "step": 14400 + "epoch": 2.15, + "learning_rate": 1.6761209062178025e-05, + "loss": 2.6495, + "step": 72700 }, { - "epoch": 5.03, - "learning_rate": 2.927732536727094e-05, - "loss": 1.7699, - "step": 14450 + "epoch": 2.15, + "learning_rate": 1.670332596288536e-05, + "loss": 2.6867, + "step": 72800 }, { - "epoch": 5.04, - "learning_rate": 2.9175021483815528e-05, - "loss": 1.8145, - "step": 14500 + "epoch": 2.15, + "learning_rate": 1.664544286359269e-05, + "loss": 2.6519, + "step": 72900 }, { - "epoch": 5.06, - "learning_rate": 2.907271760036011e-05, - "loss": 1.8895, - "step": 14550 + "epoch": 2.15, + "learning_rate": 1.658755976430002e-05, + "loss": 2.6794, + "step": 73000 }, { - "epoch": 5.08, - "learning_rate": 2.8970413716904694e-05, - "loss": 1.8408, - "step": 14600 + "epoch": 2.16, + "learning_rate": 1.6529676665007352e-05, + "loss": 2.6281, + "step": 73100 }, { - "epoch": 5.1, - "learning_rate": 2.886810983344928e-05, - "loss": 1.8626, - "step": 14650 + "epoch": 2.16, + "learning_rate": 1.6471793565714682e-05, + "loss": 2.6679, + "step": 73200 }, { - "epoch": 5.11, - "learning_rate": 2.8765805949993863e-05, - "loss": 1.8605, - "step": 14700 + "epoch": 2.16, + "learning_rate": 1.6413910466422012e-05, + "loss": 2.6788, + "step": 73300 }, { - "epoch": 5.13, - "learning_rate": 2.8663502066538446e-05, - "loss": 1.8738, - "step": 14750 + "epoch": 2.17, + "learning_rate": 1.6356027367129346e-05, + "loss": 2.6201, + "step": 73400 }, { - "epoch": 5.15, - "learning_rate": 2.8561198183083032e-05, - "loss": 1.8194, - "step": 14800 + "epoch": 2.17, + "learning_rate": 1.629814426783668e-05, + "loss": 2.6763, + "step": 73500 }, { - "epoch": 5.17, - "learning_rate": 2.8458894299627615e-05, - "loss": 1.8558, - "step": 14850 + "epoch": 2.17, + "learning_rate": 1.624026116854401e-05, + "loss": 2.6572, + "step": 73600 }, { - "epoch": 5.18, - "learning_rate": 2.83565904161722e-05, - "loss": 1.8444, - "step": 14900 + "epoch": 2.18, + "learning_rate": 1.6182378069251343e-05, + "loss": 2.6733, + "step": 73700 }, { - "epoch": 5.2, - "learning_rate": 2.8254286532716784e-05, - "loss": 1.8405, - "step": 14950 + "epoch": 2.18, + "learning_rate": 1.6124494969958673e-05, + "loss": 2.6219, + "step": 73800 }, { - "epoch": 5.22, - "learning_rate": 2.8151982649261367e-05, - "loss": 1.8181, - "step": 15000 + "epoch": 2.18, + "learning_rate": 1.6066611870666003e-05, + "loss": 2.5916, + "step": 73900 }, { - "epoch": 5.23, - "learning_rate": 2.8049678765805954e-05, - "loss": 1.8897, - "step": 15050 + "epoch": 2.18, + "learning_rate": 1.6008728771373337e-05, + "loss": 2.689, + "step": 74000 }, { - "epoch": 5.25, - "learning_rate": 2.7947374882350537e-05, - "loss": 1.8484, - "step": 15100 + "epoch": 2.19, + "learning_rate": 1.5950845672080667e-05, + "loss": 2.6972, + "step": 74100 }, { - "epoch": 5.27, - "learning_rate": 2.7845070998895116e-05, - "loss": 1.8372, - "step": 15150 + "epoch": 2.19, + "learning_rate": 1.5892962572787997e-05, + "loss": 2.6629, + "step": 74200 }, { - "epoch": 5.29, - "learning_rate": 2.7742767115439706e-05, - "loss": 1.9071, - "step": 15200 + "epoch": 2.19, + "learning_rate": 1.583507947349533e-05, + "loss": 2.6508, + "step": 74300 }, { - "epoch": 5.3, - "learning_rate": 2.764046323198429e-05, - "loss": 1.9168, - "step": 15250 + "epoch": 2.2, + "learning_rate": 1.577719637420266e-05, + "loss": 2.7088, + "step": 74400 }, { - "epoch": 5.32, - "learning_rate": 2.7538159348528868e-05, - "loss": 1.9397, - "step": 15300 + "epoch": 2.2, + "learning_rate": 1.571931327490999e-05, + "loss": 2.644, + "step": 74500 }, { - "epoch": 5.34, - "learning_rate": 2.7435855465073458e-05, - "loss": 1.9043, - "step": 15350 + "epoch": 2.2, + "learning_rate": 1.5661430175617324e-05, + "loss": 2.6248, + "step": 74600 }, { - "epoch": 5.36, - "learning_rate": 2.7333551581618038e-05, - "loss": 1.8516, - "step": 15400 + "epoch": 2.21, + "learning_rate": 1.5603547076324657e-05, + "loss": 2.5826, + "step": 74700 }, { - "epoch": 5.37, - "learning_rate": 2.7231247698162627e-05, - "loss": 1.8666, - "step": 15450 + "epoch": 2.21, + "learning_rate": 1.5545663977031988e-05, + "loss": 2.6165, + "step": 74800 }, { - "epoch": 5.39, - "learning_rate": 2.7128943814707207e-05, - "loss": 1.8822, - "step": 15500 + "epoch": 2.21, + "learning_rate": 1.5487780877739318e-05, + "loss": 2.6644, + "step": 74900 }, { - "epoch": 5.41, - "learning_rate": 2.702663993125179e-05, - "loss": 1.9649, - "step": 15550 + "epoch": 2.21, + "learning_rate": 1.542989777844665e-05, + "loss": 2.7307, + "step": 75000 }, { - "epoch": 5.43, - "learning_rate": 2.692433604779638e-05, - "loss": 1.9319, - "step": 15600 + "epoch": 2.22, + "learning_rate": 1.537201467915398e-05, + "loss": 2.664, + "step": 75100 }, { - "epoch": 5.44, - "learning_rate": 2.682203216434096e-05, - "loss": 1.8705, - "step": 15650 + "epoch": 2.22, + "learning_rate": 1.531413157986131e-05, + "loss": 2.6484, + "step": 75200 }, { - "epoch": 5.46, - "learning_rate": 2.6719728280885542e-05, - "loss": 1.8545, - "step": 15700 + "epoch": 2.22, + "learning_rate": 1.5256248480568645e-05, + "loss": 2.6445, + "step": 75300 }, { - "epoch": 5.48, - "learning_rate": 2.6617424397430128e-05, - "loss": 1.8977, - "step": 15750 + "epoch": 2.23, + "learning_rate": 1.5198365381275975e-05, + "loss": 2.6681, + "step": 75400 }, { - "epoch": 5.5, - "learning_rate": 2.651512051397471e-05, - "loss": 1.9078, - "step": 15800 + "epoch": 2.23, + "learning_rate": 1.5140482281983307e-05, + "loss": 2.6783, + "step": 75500 }, { - "epoch": 5.51, - "learning_rate": 2.6412816630519294e-05, - "loss": 1.907, - "step": 15850 + "epoch": 2.23, + "learning_rate": 1.5082599182690638e-05, + "loss": 2.6748, + "step": 75600 }, { - "epoch": 5.53, - "learning_rate": 2.631051274706388e-05, - "loss": 1.9019, - "step": 15900 + "epoch": 2.23, + "learning_rate": 1.5024716083397969e-05, + "loss": 2.6884, + "step": 75700 }, { - "epoch": 5.55, - "learning_rate": 2.6208208863608463e-05, - "loss": 1.8915, - "step": 15950 + "epoch": 2.24, + "learning_rate": 1.4966832984105302e-05, + "loss": 2.6373, + "step": 75800 }, { - "epoch": 5.57, - "learning_rate": 2.6105904980153046e-05, - "loss": 1.9043, - "step": 16000 + "epoch": 2.24, + "learning_rate": 1.4908949884812634e-05, + "loss": 2.6803, + "step": 75900 }, { - "epoch": 5.58, - "learning_rate": 2.6003601096697633e-05, - "loss": 1.8562, - "step": 16050 + "epoch": 2.24, + "learning_rate": 1.4851066785519966e-05, + "loss": 2.6763, + "step": 76000 }, { - "epoch": 5.6, - "learning_rate": 2.5901297213242215e-05, - "loss": 1.8799, - "step": 16100 + "epoch": 2.25, + "learning_rate": 1.4793183686227296e-05, + "loss": 2.6724, + "step": 76100 }, { - "epoch": 5.62, - "learning_rate": 2.5798993329786802e-05, - "loss": 1.9325, - "step": 16150 + "epoch": 2.25, + "learning_rate": 1.4735300586934628e-05, + "loss": 2.6505, + "step": 76200 }, { - "epoch": 5.63, - "learning_rate": 2.5696689446331385e-05, - "loss": 1.85, - "step": 16200 + "epoch": 2.25, + "learning_rate": 1.467741748764196e-05, + "loss": 2.6467, + "step": 76300 }, { - "epoch": 5.65, - "learning_rate": 2.5594385562875968e-05, - "loss": 1.9304, - "step": 16250 + "epoch": 2.26, + "learning_rate": 1.4620113219342215e-05, + "loss": 2.6609, + "step": 76400 }, { - "epoch": 5.67, - "learning_rate": 2.5492081679420554e-05, - "loss": 1.9084, - "step": 16300 + "epoch": 2.26, + "learning_rate": 1.4562230120049549e-05, + "loss": 2.6014, + "step": 76500 }, { - "epoch": 5.69, - "learning_rate": 2.5389777795965137e-05, - "loss": 1.9046, - "step": 16350 + "epoch": 2.26, + "learning_rate": 1.450434702075688e-05, + "loss": 2.6619, + "step": 76600 }, { - "epoch": 5.7, - "learning_rate": 2.528747391250972e-05, - "loss": 1.9901, - "step": 16400 + "epoch": 2.26, + "learning_rate": 1.4446463921464213e-05, + "loss": 2.6916, + "step": 76700 }, { - "epoch": 5.72, - "learning_rate": 2.5185170029054306e-05, - "loss": 1.9568, - "step": 16450 + "epoch": 2.27, + "learning_rate": 1.4388580822171543e-05, + "loss": 2.7405, + "step": 76800 }, { - "epoch": 5.74, - "learning_rate": 2.5084912223268e-05, - "loss": 1.8857, - "step": 16500 + "epoch": 2.27, + "learning_rate": 1.4330697722878874e-05, + "loss": 2.6346, + "step": 76900 }, { - "epoch": 5.76, - "learning_rate": 2.498260833981258e-05, - "loss": 1.8836, - "step": 16550 + "epoch": 2.27, + "learning_rate": 1.4272814623586206e-05, + "loss": 2.6796, + "step": 77000 }, { - "epoch": 5.77, - "learning_rate": 2.4880304456357165e-05, - "loss": 1.8819, - "step": 16600 + "epoch": 2.28, + "learning_rate": 1.4214931524293536e-05, + "loss": 2.6257, + "step": 77100 }, { - "epoch": 5.79, - "learning_rate": 2.477800057290175e-05, - "loss": 1.926, - "step": 16650 + "epoch": 2.28, + "learning_rate": 1.4157048425000868e-05, + "loss": 2.622, + "step": 77200 }, { - "epoch": 5.81, - "learning_rate": 2.467569668944633e-05, - "loss": 1.8706, - "step": 16700 + "epoch": 2.28, + "learning_rate": 1.40991653257082e-05, + "loss": 2.6802, + "step": 77300 }, { - "epoch": 5.83, - "learning_rate": 2.4573392805990917e-05, - "loss": 1.9216, - "step": 16750 + "epoch": 2.28, + "learning_rate": 1.404128222641553e-05, + "loss": 2.6289, + "step": 77400 }, { - "epoch": 5.84, - "learning_rate": 2.44710889225355e-05, - "loss": 1.9101, - "step": 16800 + "epoch": 2.29, + "learning_rate": 1.3983399127122862e-05, + "loss": 2.6751, + "step": 77500 }, { - "epoch": 5.86, - "learning_rate": 2.4368785039080086e-05, - "loss": 1.9195, - "step": 16850 + "epoch": 2.29, + "learning_rate": 1.3925516027830195e-05, + "loss": 2.6744, + "step": 77600 }, { - "epoch": 5.88, - "learning_rate": 2.426648115562467e-05, - "loss": 1.9217, - "step": 16900 + "epoch": 2.29, + "learning_rate": 1.3867632928537527e-05, + "loss": 2.6766, + "step": 77700 }, { - "epoch": 5.9, - "learning_rate": 2.4164177272169252e-05, - "loss": 1.9095, - "step": 16950 + "epoch": 2.3, + "learning_rate": 1.3809749829244859e-05, + "loss": 2.7145, + "step": 77800 }, { - "epoch": 5.91, - "learning_rate": 2.4061873388713838e-05, - "loss": 1.9033, - "step": 17000 + "epoch": 2.3, + "learning_rate": 1.3751866729952189e-05, + "loss": 2.727, + "step": 77900 }, { - "epoch": 5.93, - "learning_rate": 2.395956950525842e-05, - "loss": 1.9233, - "step": 17050 + "epoch": 2.3, + "learning_rate": 1.369398363065952e-05, + "loss": 2.647, + "step": 78000 }, { - "epoch": 5.95, - "learning_rate": 2.3857265621803004e-05, - "loss": 1.9185, - "step": 17100 + "epoch": 2.31, + "learning_rate": 1.3636100531366853e-05, + "loss": 2.7302, + "step": 78100 }, { - "epoch": 5.97, - "learning_rate": 2.375496173834759e-05, - "loss": 1.9486, - "step": 17150 + "epoch": 2.31, + "learning_rate": 1.3578217432074183e-05, + "loss": 2.7183, + "step": 78200 }, { - "epoch": 5.98, - "learning_rate": 2.3652657854892173e-05, - "loss": 1.9232, - "step": 17200 + "epoch": 2.31, + "learning_rate": 1.3520334332781515e-05, + "loss": 2.5994, + "step": 78300 }, { - "epoch": 6.0, - "learning_rate": 2.3550353971436756e-05, - "loss": 1.8381, - "step": 17250 + "epoch": 2.31, + "learning_rate": 1.3462451233488846e-05, + "loss": 2.632, + "step": 78400 }, { - "epoch": 6.0, - "eval_gen_len": 19.9681, - "eval_loss": 3.3179376125335693, - "eval_rouge1": 16.6764, - "eval_rouge2": 4.4834, - "eval_rougeL": 13.5489, - "eval_rougeLsum": 14.6173, - "eval_runtime": 251.1063, - "eval_samples_per_second": 9.251, - "eval_steps_per_second": 1.159, - "step": 17250 + "epoch": 2.32, + "learning_rate": 1.3404568134196176e-05, + "loss": 2.6969, + "step": 78500 }, { - "epoch": 6.02, - "learning_rate": 2.344805008798134e-05, - "loss": 1.636, - "step": 17300 + "epoch": 2.32, + "learning_rate": 1.3346685034903508e-05, + "loss": 2.6856, + "step": 78600 }, { - "epoch": 6.03, - "learning_rate": 2.3345746204525926e-05, - "loss": 1.6805, - "step": 17350 + "epoch": 2.32, + "learning_rate": 1.3289380766603768e-05, + "loss": 2.6106, + "step": 78700 }, { - "epoch": 6.05, - "learning_rate": 2.324344232107051e-05, - "loss": 1.6332, - "step": 17400 + "epoch": 2.33, + "learning_rate": 1.32314976673111e-05, + "loss": 2.642, + "step": 78800 }, { - "epoch": 6.07, - "learning_rate": 2.314113843761509e-05, - "loss": 1.6295, - "step": 17450 + "epoch": 2.33, + "learning_rate": 1.317361456801843e-05, + "loss": 2.6484, + "step": 78900 }, { - "epoch": 6.09, - "learning_rate": 2.3038834554159678e-05, - "loss": 1.6199, - "step": 17500 + "epoch": 2.33, + "learning_rate": 1.3115731468725761e-05, + "loss": 2.6598, + "step": 79000 }, { - "epoch": 6.1, - "learning_rate": 2.293653067070426e-05, - "loss": 1.6603, - "step": 17550 + "epoch": 2.34, + "learning_rate": 1.3057848369433093e-05, + "loss": 2.5861, + "step": 79100 }, { - "epoch": 6.12, - "learning_rate": 2.2834226787248844e-05, - "loss": 1.6277, - "step": 17600 + "epoch": 2.34, + "learning_rate": 1.2999965270140423e-05, + "loss": 2.6432, + "step": 79200 }, { - "epoch": 6.14, - "learning_rate": 2.273192290379343e-05, - "loss": 1.5951, - "step": 17650 + "epoch": 2.34, + "learning_rate": 1.2942082170847755e-05, + "loss": 2.6069, + "step": 79300 }, { - "epoch": 6.16, - "learning_rate": 2.2629619020338013e-05, - "loss": 1.627, - "step": 17700 + "epoch": 2.34, + "learning_rate": 1.2884199071555089e-05, + "loss": 2.7582, + "step": 79400 }, { - "epoch": 6.17, - "learning_rate": 2.2527315136882596e-05, - "loss": 1.6449, - "step": 17750 + "epoch": 2.35, + "learning_rate": 1.282631597226242e-05, + "loss": 2.7005, + "step": 79500 }, { - "epoch": 6.19, - "learning_rate": 2.2425011253427182e-05, - "loss": 1.685, - "step": 17800 + "epoch": 2.35, + "learning_rate": 1.2768432872969752e-05, + "loss": 2.5888, + "step": 79600 }, { - "epoch": 6.21, - "learning_rate": 2.2322707369971765e-05, - "loss": 1.6253, - "step": 17850 + "epoch": 2.35, + "learning_rate": 1.2711128604670008e-05, + "loss": 2.7044, + "step": 79700 }, { - "epoch": 6.23, - "learning_rate": 2.222040348651635e-05, - "loss": 1.6401, - "step": 17900 + "epoch": 2.36, + "learning_rate": 1.265324550537734e-05, + "loss": 2.6257, + "step": 79800 }, { - "epoch": 6.24, - "learning_rate": 2.211809960306093e-05, - "loss": 1.6131, - "step": 17950 + "epoch": 2.36, + "learning_rate": 1.259536240608467e-05, + "loss": 2.6742, + "step": 79900 }, { - "epoch": 6.26, - "learning_rate": 2.2015795719605517e-05, - "loss": 1.6889, - "step": 18000 + "epoch": 2.36, + "learning_rate": 1.2537479306792002e-05, + "loss": 2.675, + "step": 80000 }, { - "epoch": 6.28, - "learning_rate": 2.1913491836150104e-05, - "loss": 1.6918, - "step": 18050 + "epoch": 2.36, + "learning_rate": 1.2479596207499335e-05, + "loss": 2.6145, + "step": 80100 }, { - "epoch": 6.3, - "learning_rate": 2.1811187952694686e-05, - "loss": 1.6414, - "step": 18100 + "epoch": 2.37, + "learning_rate": 1.2421713108206666e-05, + "loss": 2.68, + "step": 80200 }, { - "epoch": 6.31, - "learning_rate": 2.170888406923927e-05, - "loss": 1.6895, - "step": 18150 + "epoch": 2.37, + "learning_rate": 1.2363830008913999e-05, + "loss": 2.6717, + "step": 80300 }, { - "epoch": 6.33, - "learning_rate": 2.1606580185783852e-05, - "loss": 1.6829, - "step": 18200 + "epoch": 2.37, + "learning_rate": 1.230594690962133e-05, + "loss": 2.6563, + "step": 80400 }, { - "epoch": 6.35, - "learning_rate": 2.150427630232844e-05, - "loss": 1.6401, - "step": 18250 + "epoch": 2.38, + "learning_rate": 1.2248063810328661e-05, + "loss": 2.7272, + "step": 80500 }, { - "epoch": 6.37, - "learning_rate": 2.140197241887302e-05, - "loss": 1.6438, - "step": 18300 + "epoch": 2.38, + "learning_rate": 1.2190180711035993e-05, + "loss": 2.6457, + "step": 80600 }, { - "epoch": 6.38, - "learning_rate": 2.1299668535417604e-05, - "loss": 1.6432, - "step": 18350 + "epoch": 2.38, + "learning_rate": 1.2132297611743323e-05, + "loss": 2.7066, + "step": 80700 }, { - "epoch": 6.4, - "learning_rate": 2.119736465196219e-05, - "loss": 1.6894, - "step": 18400 + "epoch": 2.39, + "learning_rate": 1.2074414512450655e-05, + "loss": 2.6104, + "step": 80800 }, { - "epoch": 6.42, - "learning_rate": 2.1095060768506774e-05, - "loss": 1.6283, - "step": 18450 + "epoch": 2.39, + "learning_rate": 1.2016531413157988e-05, + "loss": 2.6494, + "step": 80900 }, { - "epoch": 6.43, - "learning_rate": 2.0992756885051357e-05, - "loss": 1.6865, - "step": 18500 + "epoch": 2.39, + "learning_rate": 1.1958648313865318e-05, + "loss": 2.656, + "step": 81000 }, { - "epoch": 6.45, - "learning_rate": 2.0890453001595943e-05, - "loss": 1.6672, - "step": 18550 + "epoch": 2.39, + "learning_rate": 1.190076521457265e-05, + "loss": 2.7509, + "step": 81100 }, { - "epoch": 6.47, - "learning_rate": 2.0790195195809632e-05, - "loss": 1.6886, - "step": 18600 + "epoch": 2.4, + "learning_rate": 1.184288211527998e-05, + "loss": 2.6835, + "step": 81200 }, { - "epoch": 6.49, - "learning_rate": 2.068789131235422e-05, - "loss": 1.6576, - "step": 18650 + "epoch": 2.4, + "learning_rate": 1.1784999015987312e-05, + "loss": 2.6426, + "step": 81300 }, { - "epoch": 6.5, - "learning_rate": 2.0585587428898805e-05, - "loss": 1.6552, - "step": 18700 + "epoch": 2.4, + "learning_rate": 1.1727115916694644e-05, + "loss": 2.6237, + "step": 81400 }, { - "epoch": 6.52, - "learning_rate": 2.0483283545443384e-05, - "loss": 1.6455, - "step": 18750 + "epoch": 2.41, + "learning_rate": 1.1669232817401976e-05, + "loss": 2.6958, + "step": 81500 }, { - "epoch": 6.54, - "learning_rate": 2.038097966198797e-05, - "loss": 1.6223, - "step": 18800 + "epoch": 2.41, + "learning_rate": 1.1611349718109307e-05, + "loss": 2.6218, + "step": 81600 }, { - "epoch": 6.56, - "learning_rate": 2.0278675778532554e-05, - "loss": 1.6833, - "step": 18850 + "epoch": 2.41, + "learning_rate": 1.1553466618816639e-05, + "loss": 2.5956, + "step": 81700 }, { - "epoch": 6.57, - "learning_rate": 2.0176371895077137e-05, - "loss": 1.6283, - "step": 18900 + "epoch": 2.41, + "learning_rate": 1.149558351952397e-05, + "loss": 2.6071, + "step": 81800 }, { - "epoch": 6.59, - "learning_rate": 2.0074068011621723e-05, - "loss": 1.6394, - "step": 18950 + "epoch": 2.42, + "learning_rate": 1.1437700420231301e-05, + "loss": 2.6936, + "step": 81900 }, { - "epoch": 6.61, - "learning_rate": 1.9971764128166306e-05, - "loss": 1.7302, - "step": 19000 + "epoch": 2.42, + "learning_rate": 1.1379817320938633e-05, + "loss": 2.7179, + "step": 82000 }, { - "epoch": 6.63, - "learning_rate": 1.9869460244710892e-05, - "loss": 1.6423, - "step": 19050 + "epoch": 2.42, + "learning_rate": 1.1321934221645965e-05, + "loss": 2.7001, + "step": 82100 }, { - "epoch": 6.64, - "learning_rate": 1.9767156361255472e-05, - "loss": 1.6277, - "step": 19100 + "epoch": 2.43, + "learning_rate": 1.1264051122353296e-05, + "loss": 2.5915, + "step": 82200 }, { - "epoch": 6.66, - "learning_rate": 1.9664852477800058e-05, - "loss": 1.6844, - "step": 19150 + "epoch": 2.43, + "learning_rate": 1.1206168023060626e-05, + "loss": 2.6884, + "step": 82300 }, { - "epoch": 6.68, - "learning_rate": 1.9562548594344644e-05, - "loss": 1.6526, - "step": 19200 + "epoch": 2.43, + "learning_rate": 1.1148284923767958e-05, + "loss": 2.647, + "step": 82400 }, { - "epoch": 6.7, - "learning_rate": 1.9460244710889224e-05, - "loss": 1.6732, - "step": 19250 + "epoch": 2.44, + "learning_rate": 1.109040182447529e-05, + "loss": 2.6499, + "step": 82500 }, { - "epoch": 6.71, - "learning_rate": 1.935794082743381e-05, - "loss": 1.6854, - "step": 19300 + "epoch": 2.44, + "learning_rate": 1.1032518725182622e-05, + "loss": 2.654, + "step": 82600 }, { - "epoch": 6.73, - "learning_rate": 1.9255636943978393e-05, - "loss": 1.6902, - "step": 19350 + "epoch": 2.44, + "learning_rate": 1.0974635625889954e-05, + "loss": 2.6469, + "step": 82700 }, { - "epoch": 6.75, - "learning_rate": 1.915333306052298e-05, - "loss": 1.6522, - "step": 19400 + "epoch": 2.44, + "learning_rate": 1.0916752526597285e-05, + "loss": 2.6167, + "step": 82800 }, { - "epoch": 6.77, - "learning_rate": 1.9051029177067562e-05, - "loss": 1.7403, - "step": 19450 + "epoch": 2.45, + "learning_rate": 1.0858869427304616e-05, + "loss": 2.6403, + "step": 82900 }, { - "epoch": 6.78, - "learning_rate": 1.8948725293612145e-05, - "loss": 1.6806, - "step": 19500 + "epoch": 2.45, + "learning_rate": 1.0800986328011947e-05, + "loss": 2.632, + "step": 83000 }, { - "epoch": 6.8, - "learning_rate": 1.884642141015673e-05, - "loss": 1.6889, - "step": 19550 + "epoch": 2.45, + "learning_rate": 1.074310322871928e-05, + "loss": 2.6953, + "step": 83100 }, { - "epoch": 6.82, - "learning_rate": 1.8744117526701315e-05, - "loss": 1.6665, - "step": 19600 + "epoch": 2.46, + "learning_rate": 1.0685220129426611e-05, + "loss": 2.7353, + "step": 83200 }, { - "epoch": 6.83, - "learning_rate": 1.8641813643245897e-05, - "loss": 1.6874, - "step": 19650 + "epoch": 2.46, + "learning_rate": 1.0627337030133943e-05, + "loss": 2.6768, + "step": 83300 }, { - "epoch": 6.85, - "learning_rate": 1.8539509759790484e-05, - "loss": 1.6686, - "step": 19700 + "epoch": 2.46, + "learning_rate": 1.0569453930841273e-05, + "loss": 2.6685, + "step": 83400 }, { - "epoch": 6.87, - "learning_rate": 1.8437205876335067e-05, - "loss": 1.6768, - "step": 19750 + "epoch": 2.46, + "learning_rate": 1.0511570831548605e-05, + "loss": 2.7646, + "step": 83500 }, { - "epoch": 6.89, - "learning_rate": 1.833490199287965e-05, - "loss": 1.6844, - "step": 19800 + "epoch": 2.47, + "learning_rate": 1.0453687732255936e-05, + "loss": 2.5976, + "step": 83600 }, { - "epoch": 6.9, - "learning_rate": 1.8232598109424236e-05, - "loss": 1.6395, - "step": 19850 + "epoch": 2.47, + "learning_rate": 1.0395804632963268e-05, + "loss": 2.6466, + "step": 83700 }, { - "epoch": 6.92, - "learning_rate": 1.813029422596882e-05, - "loss": 1.6785, - "step": 19900 + "epoch": 2.47, + "learning_rate": 1.03379215336706e-05, + "loss": 2.7244, + "step": 83800 }, { - "epoch": 6.94, - "learning_rate": 1.8027990342513405e-05, - "loss": 1.705, - "step": 19950 + "epoch": 2.48, + "learning_rate": 1.0280038434377932e-05, + "loss": 2.6485, + "step": 83900 }, { - "epoch": 6.96, - "learning_rate": 1.7925686459057985e-05, - "loss": 1.6897, - "step": 20000 + "epoch": 2.48, + "learning_rate": 1.0222155335085262e-05, + "loss": 2.6187, + "step": 84000 }, { - "epoch": 6.97, - "learning_rate": 1.782338257560257e-05, - "loss": 1.6934, - "step": 20050 + "epoch": 2.48, + "learning_rate": 1.016485106678552e-05, + "loss": 2.5851, + "step": 84100 }, { - "epoch": 6.99, - "learning_rate": 1.7721078692147154e-05, - "loss": 1.6203, - "step": 20100 + "epoch": 2.49, + "learning_rate": 1.0106967967492852e-05, + "loss": 2.5945, + "step": 84200 }, { - "epoch": 7.0, - "eval_gen_len": 19.9105, - "eval_loss": 3.476348638534546, - "eval_rouge1": 17.0434, - "eval_rouge2": 4.5045, - "eval_rougeL": 13.8329, - "eval_rougeLsum": 14.9286, - "eval_runtime": 251.3557, - "eval_samples_per_second": 9.242, - "eval_steps_per_second": 1.158, - "step": 20125 + "epoch": 2.49, + "learning_rate": 1.0049084868200183e-05, + "loss": 2.6874, + "step": 84300 }, { - "epoch": 7.01, - "learning_rate": 1.7618774808691737e-05, - "loss": 1.5313, - "step": 20150 + "epoch": 2.49, + "learning_rate": 9.991780599900441e-06, + "loss": 2.6016, + "step": 84400 }, { - "epoch": 7.03, - "learning_rate": 1.7516470925236323e-05, - "loss": 1.4404, - "step": 20200 + "epoch": 2.49, + "learning_rate": 9.933897500607773e-06, + "loss": 2.7371, + "step": 84500 }, { - "epoch": 7.04, - "learning_rate": 1.7414167041780906e-05, - "loss": 1.4907, - "step": 20250 + "epoch": 2.5, + "learning_rate": 9.876014401315105e-06, + "loss": 2.6355, + "step": 84600 }, { - "epoch": 7.06, - "learning_rate": 1.7311863158325493e-05, - "loss": 1.4881, - "step": 20300 + "epoch": 2.5, + "learning_rate": 9.818131302022437e-06, + "loss": 2.6629, + "step": 84700 }, { - "epoch": 7.08, - "learning_rate": 1.7209559274870075e-05, - "loss": 1.4657, - "step": 20350 + "epoch": 2.5, + "learning_rate": 9.760248202729767e-06, + "loss": 2.6944, + "step": 84800 }, { - "epoch": 7.1, - "learning_rate": 1.710725539141466e-05, - "loss": 1.4654, - "step": 20400 + "epoch": 2.51, + "learning_rate": 9.702365103437098e-06, + "loss": 2.6812, + "step": 84900 }, { - "epoch": 7.11, - "learning_rate": 1.7004951507959245e-05, - "loss": 1.465, - "step": 20450 + "epoch": 2.51, + "learning_rate": 9.64448200414443e-06, + "loss": 2.6876, + "step": 85000 }, { - "epoch": 7.13, - "learning_rate": 1.6902647624503824e-05, - "loss": 1.4669, - "step": 20500 + "epoch": 2.51, + "learning_rate": 9.586598904851762e-06, + "loss": 2.6602, + "step": 85100 }, { - "epoch": 7.15, - "learning_rate": 1.680034374104841e-05, - "loss": 1.4409, - "step": 20550 + "epoch": 2.52, + "learning_rate": 9.528715805559094e-06, + "loss": 2.6384, + "step": 85200 }, { - "epoch": 7.17, - "learning_rate": 1.6698039857592997e-05, - "loss": 1.4553, - "step": 20600 + "epoch": 2.52, + "learning_rate": 9.470832706266426e-06, + "loss": 2.6552, + "step": 85300 }, { - "epoch": 7.18, - "learning_rate": 1.659573597413758e-05, - "loss": 1.4388, - "step": 20650 + "epoch": 2.52, + "learning_rate": 9.412949606973756e-06, + "loss": 2.6656, + "step": 85400 }, { - "epoch": 7.2, - "learning_rate": 1.6493432090682163e-05, - "loss": 1.4827, - "step": 20700 + "epoch": 2.52, + "learning_rate": 9.355066507681088e-06, + "loss": 2.6523, + "step": 85500 }, { - "epoch": 7.22, - "learning_rate": 1.6391128207226746e-05, - "loss": 1.4422, - "step": 20750 + "epoch": 2.53, + "learning_rate": 9.29718340838842e-06, + "loss": 2.741, + "step": 85600 }, { - "epoch": 7.23, - "learning_rate": 1.629087040144044e-05, - "loss": 1.5225, - "step": 20800 + "epoch": 2.53, + "learning_rate": 9.239300309095751e-06, + "loss": 2.6532, + "step": 85700 }, { - "epoch": 7.25, - "learning_rate": 1.6188566517985025e-05, - "loss": 1.4795, - "step": 20850 + "epoch": 2.53, + "learning_rate": 9.181417209803083e-06, + "loss": 2.6828, + "step": 85800 }, { - "epoch": 7.27, - "learning_rate": 1.6086262634529608e-05, - "loss": 1.4375, - "step": 20900 + "epoch": 2.54, + "learning_rate": 9.123534110510413e-06, + "loss": 2.7142, + "step": 85900 }, { - "epoch": 7.29, - "learning_rate": 1.598395875107419e-05, - "loss": 1.4506, - "step": 20950 + "epoch": 2.54, + "learning_rate": 9.065651011217745e-06, + "loss": 2.6404, + "step": 86000 }, { - "epoch": 7.3, - "learning_rate": 1.5881654867618777e-05, - "loss": 1.4734, - "step": 21000 + "epoch": 2.54, + "learning_rate": 9.007767911925077e-06, + "loss": 2.661, + "step": 86100 }, { - "epoch": 7.32, - "learning_rate": 1.577935098416336e-05, - "loss": 1.483, - "step": 21050 + "epoch": 2.54, + "learning_rate": 8.949884812632408e-06, + "loss": 2.676, + "step": 86200 }, { - "epoch": 7.34, - "learning_rate": 1.5677047100707943e-05, - "loss": 1.4546, - "step": 21100 + "epoch": 2.55, + "learning_rate": 8.89200171333974e-06, + "loss": 2.6487, + "step": 86300 }, { - "epoch": 7.36, - "learning_rate": 1.5574743217252526e-05, - "loss": 1.4732, - "step": 21150 + "epoch": 2.55, + "learning_rate": 8.83411861404707e-06, + "loss": 2.6235, + "step": 86400 }, { - "epoch": 7.37, - "learning_rate": 1.5472439333797112e-05, - "loss": 1.4479, - "step": 21200 + "epoch": 2.55, + "learning_rate": 8.776235514754402e-06, + "loss": 2.6581, + "step": 86500 }, { - "epoch": 7.39, - "learning_rate": 1.5370135450341698e-05, - "loss": 1.4968, - "step": 21250 + "epoch": 2.56, + "learning_rate": 8.718352415461734e-06, + "loss": 2.6467, + "step": 86600 }, { - "epoch": 7.41, - "learning_rate": 1.5267831566886278e-05, - "loss": 1.5115, - "step": 21300 + "epoch": 2.56, + "learning_rate": 8.660469316169064e-06, + "loss": 2.6874, + "step": 86700 }, { - "epoch": 7.43, - "learning_rate": 1.5165527683430864e-05, - "loss": 1.4405, - "step": 21350 + "epoch": 2.56, + "learning_rate": 8.602586216876397e-06, + "loss": 2.5841, + "step": 86800 }, { - "epoch": 7.44, - "learning_rate": 1.5063223799975449e-05, - "loss": 1.4919, - "step": 21400 + "epoch": 2.57, + "learning_rate": 8.54470311758373e-06, + "loss": 2.6598, + "step": 86900 }, { - "epoch": 7.46, - "learning_rate": 1.4960919916520033e-05, - "loss": 1.4872, - "step": 21450 + "epoch": 2.57, + "learning_rate": 8.48682001829106e-06, + "loss": 2.6999, + "step": 87000 }, { - "epoch": 7.48, - "learning_rate": 1.4858616033064615e-05, - "loss": 1.4497, - "step": 21500 + "epoch": 2.57, + "learning_rate": 8.428936918998391e-06, + "loss": 2.6375, + "step": 87100 }, { - "epoch": 7.5, - "learning_rate": 1.47563121496092e-05, - "loss": 1.5039, - "step": 21550 + "epoch": 2.57, + "learning_rate": 8.371053819705723e-06, + "loss": 2.6541, + "step": 87200 }, { - "epoch": 7.51, - "learning_rate": 1.4654008266153786e-05, - "loss": 1.4613, - "step": 21600 + "epoch": 2.58, + "learning_rate": 8.313170720413053e-06, + "loss": 2.6914, + "step": 87300 }, { - "epoch": 7.53, - "learning_rate": 1.4551704382698367e-05, - "loss": 1.441, - "step": 21650 + "epoch": 2.58, + "learning_rate": 8.255287621120387e-06, + "loss": 2.6972, + "step": 87400 }, { - "epoch": 7.55, - "learning_rate": 1.4449400499242951e-05, - "loss": 1.4831, - "step": 21700 + "epoch": 2.58, + "learning_rate": 8.197404521827717e-06, + "loss": 2.6217, + "step": 87500 }, { - "epoch": 7.57, - "learning_rate": 1.4347096615787536e-05, - "loss": 1.4237, - "step": 21750 + "epoch": 2.59, + "learning_rate": 8.139521422535048e-06, + "loss": 2.6914, + "step": 87600 }, { - "epoch": 7.58, - "learning_rate": 1.424479273233212e-05, - "loss": 1.4776, - "step": 21800 + "epoch": 2.59, + "learning_rate": 8.08163832324238e-06, + "loss": 2.6, + "step": 87700 }, { - "epoch": 7.6, - "learning_rate": 1.4142488848876704e-05, - "loss": 1.4167, - "step": 21850 + "epoch": 2.59, + "learning_rate": 8.02375522394971e-06, + "loss": 2.6203, + "step": 87800 }, { - "epoch": 7.62, - "learning_rate": 1.4040184965421288e-05, - "loss": 1.4623, - "step": 21900 + "epoch": 2.59, + "learning_rate": 7.965872124657044e-06, + "loss": 2.6175, + "step": 87900 }, { - "epoch": 7.63, - "learning_rate": 1.3937881081965873e-05, - "loss": 1.4778, - "step": 21950 + "epoch": 2.6, + "learning_rate": 7.907989025364376e-06, + "loss": 2.6322, + "step": 88000 }, { - "epoch": 7.65, - "learning_rate": 1.3835577198510454e-05, - "loss": 1.4511, - "step": 22000 + "epoch": 2.6, + "learning_rate": 7.850105926071706e-06, + "loss": 2.6456, + "step": 88100 }, { - "epoch": 7.67, - "learning_rate": 1.373327331505504e-05, - "loss": 1.465, - "step": 22050 + "epoch": 2.6, + "learning_rate": 7.792222826779038e-06, + "loss": 2.6561, + "step": 88200 }, { - "epoch": 7.69, - "learning_rate": 1.3630969431599625e-05, - "loss": 1.4614, - "step": 22100 + "epoch": 2.61, + "learning_rate": 7.73433972748637e-06, + "loss": 2.5974, + "step": 88300 }, { - "epoch": 7.7, - "learning_rate": 1.352866554814421e-05, - "loss": 1.4325, - "step": 22150 + "epoch": 2.61, + "learning_rate": 7.6764566281937e-06, + "loss": 2.6547, + "step": 88400 }, { - "epoch": 7.72, - "learning_rate": 1.342636166468879e-05, - "loss": 1.4482, - "step": 22200 + "epoch": 2.61, + "learning_rate": 7.618573528901032e-06, + "loss": 2.6182, + "step": 88500 }, { - "epoch": 7.74, - "learning_rate": 1.3324057781233375e-05, - "loss": 1.5127, - "step": 22250 + "epoch": 2.62, + "learning_rate": 7.560690429608364e-06, + "loss": 2.5736, + "step": 88600 }, { - "epoch": 7.76, - "learning_rate": 1.322175389777796e-05, - "loss": 1.4358, - "step": 22300 + "epoch": 2.62, + "learning_rate": 7.502807330315695e-06, + "loss": 2.6473, + "step": 88700 }, { - "epoch": 7.77, - "learning_rate": 1.3119450014322543e-05, - "loss": 1.457, - "step": 22350 + "epoch": 2.62, + "learning_rate": 7.445503062015953e-06, + "loss": 2.6361, + "step": 88800 }, { - "epoch": 7.79, - "learning_rate": 1.3017146130867128e-05, - "loss": 1.5029, - "step": 22400 + "epoch": 2.62, + "learning_rate": 7.387619962723284e-06, + "loss": 2.6453, + "step": 88900 }, { - "epoch": 7.81, - "learning_rate": 1.2914842247411712e-05, - "loss": 1.4424, - "step": 22450 + "epoch": 2.63, + "learning_rate": 7.329736863430615e-06, + "loss": 2.6865, + "step": 89000 }, { - "epoch": 7.83, - "learning_rate": 1.2812538363956297e-05, - "loss": 1.5116, - "step": 22500 + "epoch": 2.63, + "learning_rate": 7.271853764137946e-06, + "loss": 2.5792, + "step": 89100 }, { - "epoch": 7.84, - "learning_rate": 1.271023448050088e-05, - "loss": 1.4955, - "step": 22550 + "epoch": 2.63, + "learning_rate": 7.213970664845279e-06, + "loss": 2.6216, + "step": 89200 }, { - "epoch": 7.86, - "learning_rate": 1.2607930597045464e-05, - "loss": 1.5251, - "step": 22600 + "epoch": 2.64, + "learning_rate": 7.156666396545538e-06, + "loss": 2.6165, + "step": 89300 }, { - "epoch": 7.88, - "learning_rate": 1.2505626713590049e-05, - "loss": 1.5159, - "step": 22650 + "epoch": 2.64, + "learning_rate": 7.0987832972528685e-06, + "loss": 2.6615, + "step": 89400 }, { - "epoch": 7.9, - "learning_rate": 1.2403322830134632e-05, - "loss": 1.4945, - "step": 22700 + "epoch": 2.64, + "learning_rate": 7.0409001979601995e-06, + "loss": 2.6344, + "step": 89500 }, { - "epoch": 7.91, - "learning_rate": 1.2301018946679217e-05, - "loss": 1.4622, - "step": 22750 + "epoch": 2.65, + "learning_rate": 6.983017098667531e-06, + "loss": 2.6097, + "step": 89600 }, { - "epoch": 7.93, - "learning_rate": 1.2198715063223801e-05, - "loss": 1.4704, - "step": 22800 + "epoch": 2.65, + "learning_rate": 6.925133999374862e-06, + "loss": 2.6498, + "step": 89700 }, { - "epoch": 7.95, - "learning_rate": 1.2096411179768384e-05, - "loss": 1.548, - "step": 22850 + "epoch": 2.65, + "learning_rate": 6.867250900082195e-06, + "loss": 2.7191, + "step": 89800 }, { - "epoch": 7.97, - "learning_rate": 1.1994107296312969e-05, - "loss": 1.4293, - "step": 22900 + "epoch": 2.65, + "learning_rate": 6.809367800789526e-06, + "loss": 2.6154, + "step": 89900 }, { - "epoch": 7.98, - "learning_rate": 1.1891803412857552e-05, - "loss": 1.4121, - "step": 22950 + "epoch": 2.66, + "learning_rate": 6.751484701496858e-06, + "loss": 2.5856, + "step": 90000 }, { - "epoch": 8.0, - "learning_rate": 1.1791545607071244e-05, - "loss": 1.4982, - "step": 23000 + "epoch": 2.66, + "learning_rate": 6.6936016022041886e-06, + "loss": 2.6058, + "step": 90100 }, { - "epoch": 8.0, - "eval_gen_len": 19.9539, - "eval_loss": 3.6031477451324463, - "eval_rouge1": 17.0044, - "eval_rouge2": 4.7727, - "eval_rougeL": 13.8743, - "eval_rougeLsum": 14.9683, - "eval_runtime": 251.1177, - "eval_samples_per_second": 9.251, - "eval_steps_per_second": 1.159, - "step": 23000 + "epoch": 2.66, + "learning_rate": 6.6357185029115195e-06, + "loss": 2.7033, + "step": 90200 }, { - "epoch": 8.02, - "learning_rate": 1.1689241723615829e-05, - "loss": 1.3034, - "step": 23050 + "epoch": 2.67, + "learning_rate": 6.577835403618851e-06, + "loss": 2.6258, + "step": 90300 }, { - "epoch": 8.03, - "learning_rate": 1.1586937840160412e-05, - "loss": 1.2528, - "step": 23100 + "epoch": 2.67, + "learning_rate": 6.519952304326184e-06, + "loss": 2.6289, + "step": 90400 }, { - "epoch": 8.05, - "learning_rate": 1.1484633956704997e-05, - "loss": 1.3111, - "step": 23150 + "epoch": 2.67, + "learning_rate": 6.462069205033515e-06, + "loss": 2.6527, + "step": 90500 }, { - "epoch": 8.07, - "learning_rate": 1.1382330073249581e-05, - "loss": 1.2992, - "step": 23200 + "epoch": 2.67, + "learning_rate": 6.404186105740846e-06, + "loss": 2.5685, + "step": 90600 }, { - "epoch": 8.09, - "learning_rate": 1.1280026189794166e-05, - "loss": 1.3479, - "step": 23250 + "epoch": 2.68, + "learning_rate": 6.346303006448178e-06, + "loss": 2.6636, + "step": 90700 }, { - "epoch": 8.1, - "learning_rate": 1.1177722306338749e-05, - "loss": 1.2799, - "step": 23300 + "epoch": 2.68, + "learning_rate": 6.288419907155509e-06, + "loss": 2.6443, + "step": 90800 }, { - "epoch": 8.12, - "learning_rate": 1.1075418422883333e-05, - "loss": 1.3115, - "step": 23350 + "epoch": 2.68, + "learning_rate": 6.23053680786284e-06, + "loss": 2.6078, + "step": 90900 }, { - "epoch": 8.14, - "learning_rate": 1.0973114539427918e-05, - "loss": 1.3176, - "step": 23400 + "epoch": 2.69, + "learning_rate": 6.172653708570171e-06, + "loss": 2.6654, + "step": 91000 }, { - "epoch": 8.16, - "learning_rate": 1.0870810655972501e-05, - "loss": 1.325, - "step": 23450 + "epoch": 2.69, + "learning_rate": 6.114770609277504e-06, + "loss": 2.6124, + "step": 91100 }, { - "epoch": 8.17, - "learning_rate": 1.0768506772517086e-05, - "loss": 1.3137, - "step": 23500 + "epoch": 2.69, + "learning_rate": 6.056887509984835e-06, + "loss": 2.6233, + "step": 91200 }, { - "epoch": 8.19, - "learning_rate": 1.0666202889061668e-05, - "loss": 1.3652, - "step": 23550 + "epoch": 2.7, + "learning_rate": 5.999004410692166e-06, + "loss": 2.614, + "step": 91300 }, { - "epoch": 8.21, - "learning_rate": 1.0563899005606253e-05, - "loss": 1.2918, - "step": 23600 + "epoch": 2.7, + "learning_rate": 5.941121311399498e-06, + "loss": 2.6472, + "step": 91400 }, { - "epoch": 8.23, - "learning_rate": 1.0461595122150838e-05, - "loss": 1.3667, - "step": 23650 + "epoch": 2.7, + "learning_rate": 5.8832382121068295e-06, + "loss": 2.6337, + "step": 91500 }, { - "epoch": 8.24, - "learning_rate": 1.0359291238695422e-05, - "loss": 1.3175, - "step": 23700 + "epoch": 2.7, + "learning_rate": 5.82535511281416e-06, + "loss": 2.5766, + "step": 91600 }, { - "epoch": 8.26, - "learning_rate": 1.0256987355240005e-05, - "loss": 1.32, - "step": 23750 + "epoch": 2.71, + "learning_rate": 5.767472013521492e-06, + "loss": 2.6287, + "step": 91700 }, { - "epoch": 8.28, - "learning_rate": 1.015468347178459e-05, - "loss": 1.3254, - "step": 23800 + "epoch": 2.71, + "learning_rate": 5.709588914228824e-06, + "loss": 2.5166, + "step": 91800 }, { - "epoch": 8.3, - "learning_rate": 1.0054425665998283e-05, - "loss": 1.3535, - "step": 23850 + "epoch": 2.71, + "learning_rate": 5.651705814936156e-06, + "loss": 2.6885, + "step": 91900 }, { - "epoch": 8.31, - "learning_rate": 9.952121782542866e-06, - "loss": 1.2927, - "step": 23900 + "epoch": 2.72, + "learning_rate": 5.593822715643487e-06, + "loss": 2.6536, + "step": 92000 }, { - "epoch": 8.33, - "learning_rate": 9.84981789908745e-06, - "loss": 1.3337, - "step": 23950 + "epoch": 2.72, + "learning_rate": 5.535939616350818e-06, + "loss": 2.6554, + "step": 92100 }, { - "epoch": 8.35, - "learning_rate": 9.747514015632033e-06, - "loss": 1.3132, - "step": 24000 + "epoch": 2.72, + "learning_rate": 5.47805651705815e-06, + "loss": 2.5428, + "step": 92200 }, { - "epoch": 8.37, - "learning_rate": 9.645210132176618e-06, - "loss": 1.3396, - "step": 24050 + "epoch": 2.72, + "learning_rate": 5.420173417765481e-06, + "loss": 2.6454, + "step": 92300 }, { - "epoch": 8.38, - "learning_rate": 9.542906248721202e-06, - "loss": 1.3197, - "step": 24100 + "epoch": 2.73, + "learning_rate": 5.362290318472812e-06, + "loss": 2.6419, + "step": 92400 }, { - "epoch": 8.4, - "learning_rate": 9.440602365265785e-06, - "loss": 1.2531, - "step": 24150 + "epoch": 2.73, + "learning_rate": 5.304407219180144e-06, + "loss": 2.6316, + "step": 92500 }, { - "epoch": 8.42, - "learning_rate": 9.33829848181037e-06, - "loss": 1.315, - "step": 24200 + "epoch": 2.73, + "learning_rate": 5.247102950880403e-06, + "loss": 2.6203, + "step": 92600 }, { - "epoch": 8.43, - "learning_rate": 9.235994598354955e-06, - "loss": 1.3129, - "step": 24250 + "epoch": 2.74, + "learning_rate": 5.189219851587734e-06, + "loss": 2.604, + "step": 92700 }, { - "epoch": 8.45, - "learning_rate": 9.133690714899539e-06, - "loss": 1.3048, - "step": 24300 + "epoch": 2.74, + "learning_rate": 5.131336752295065e-06, + "loss": 2.6726, + "step": 92800 }, { - "epoch": 8.47, - "learning_rate": 9.031386831444122e-06, - "loss": 1.3205, - "step": 24350 + "epoch": 2.74, + "learning_rate": 5.073453653002397e-06, + "loss": 2.629, + "step": 92900 }, { - "epoch": 8.49, - "learning_rate": 8.929082947988707e-06, - "loss": 1.3407, - "step": 24400 + "epoch": 2.75, + "learning_rate": 5.015570553709728e-06, + "loss": 2.6562, + "step": 93000 }, { - "epoch": 8.5, - "learning_rate": 8.82677906453329e-06, - "loss": 1.3455, - "step": 24450 + "epoch": 2.75, + "learning_rate": 4.957687454417059e-06, + "loss": 2.637, + "step": 93100 }, { - "epoch": 8.52, - "learning_rate": 8.724475181077874e-06, - "loss": 1.3333, - "step": 24500 + "epoch": 2.75, + "learning_rate": 4.899804355124391e-06, + "loss": 2.6057, + "step": 93200 }, { - "epoch": 8.54, - "learning_rate": 8.622171297622459e-06, - "loss": 1.3236, - "step": 24550 + "epoch": 2.75, + "learning_rate": 4.841921255831723e-06, + "loss": 2.7188, + "step": 93300 }, { - "epoch": 8.56, - "learning_rate": 8.519867414167042e-06, - "loss": 1.3537, - "step": 24600 + "epoch": 2.76, + "learning_rate": 4.784038156539054e-06, + "loss": 2.6679, + "step": 93400 }, { - "epoch": 8.57, - "learning_rate": 8.417563530711626e-06, - "loss": 1.302, - "step": 24650 + "epoch": 2.76, + "learning_rate": 4.7261550572463855e-06, + "loss": 2.658, + "step": 93500 }, { - "epoch": 8.59, - "learning_rate": 8.31525964725621e-06, - "loss": 1.2704, - "step": 24700 + "epoch": 2.76, + "learning_rate": 4.668271957953716e-06, + "loss": 2.7056, + "step": 93600 }, { - "epoch": 8.61, - "learning_rate": 8.212955763800794e-06, - "loss": 1.2953, - "step": 24750 + "epoch": 2.77, + "learning_rate": 4.610388858661048e-06, + "loss": 2.568, + "step": 93700 }, { - "epoch": 8.63, - "learning_rate": 8.110651880345379e-06, - "loss": 1.3311, - "step": 24800 + "epoch": 2.77, + "learning_rate": 4.55250575936838e-06, + "loss": 2.6135, + "step": 93800 }, { - "epoch": 8.64, - "learning_rate": 8.008347996889963e-06, - "loss": 1.2794, - "step": 24850 + "epoch": 2.77, + "learning_rate": 4.494622660075711e-06, + "loss": 2.6412, + "step": 93900 }, { - "epoch": 8.66, - "learning_rate": 7.906044113434546e-06, - "loss": 1.3408, - "step": 24900 + "epoch": 2.77, + "learning_rate": 4.436739560783043e-06, + "loss": 2.6006, + "step": 94000 }, { - "epoch": 8.68, - "learning_rate": 7.803740229979129e-06, - "loss": 1.298, - "step": 24950 + "epoch": 2.78, + "learning_rate": 4.3788564614903745e-06, + "loss": 2.6539, + "step": 94100 }, { - "epoch": 8.7, - "learning_rate": 7.701436346523715e-06, - "loss": 1.2529, - "step": 25000 + "epoch": 2.78, + "learning_rate": 4.3209733621977055e-06, + "loss": 2.5896, + "step": 94200 }, { - "epoch": 8.71, - "learning_rate": 7.599132463068298e-06, - "loss": 1.3221, - "step": 25050 + "epoch": 2.78, + "learning_rate": 4.263090262905037e-06, + "loss": 2.6026, + "step": 94300 }, { - "epoch": 8.73, - "learning_rate": 7.496828579612883e-06, - "loss": 1.3254, - "step": 25100 + "epoch": 2.79, + "learning_rate": 4.205207163612369e-06, + "loss": 2.6195, + "step": 94400 }, { - "epoch": 8.75, - "learning_rate": 7.394524696157467e-06, - "loss": 1.2636, - "step": 25150 + "epoch": 2.79, + "learning_rate": 4.1473240643197e-06, + "loss": 2.6127, + "step": 94500 }, { - "epoch": 8.77, - "learning_rate": 7.292220812702051e-06, - "loss": 1.4183, - "step": 25200 + "epoch": 2.79, + "learning_rate": 4.089440965027032e-06, + "loss": 2.6983, + "step": 94600 }, { - "epoch": 8.78, - "learning_rate": 7.189916929246634e-06, - "loss": 1.3473, - "step": 25250 + "epoch": 2.8, + "learning_rate": 4.031557865734363e-06, + "loss": 2.5699, + "step": 94700 }, { - "epoch": 8.8, - "learning_rate": 7.087613045791218e-06, - "loss": 1.3512, - "step": 25300 + "epoch": 2.8, + "learning_rate": 3.9736747664416946e-06, + "loss": 2.6127, + "step": 94800 }, { - "epoch": 8.82, - "learning_rate": 6.985309162335803e-06, - "loss": 1.3251, - "step": 25350 + "epoch": 2.8, + "learning_rate": 3.915791667149026e-06, + "loss": 2.6966, + "step": 94900 }, { - "epoch": 8.83, - "learning_rate": 6.8830052788803864e-06, - "loss": 1.3251, - "step": 25400 + "epoch": 2.8, + "learning_rate": 3.857908567856357e-06, + "loss": 2.6416, + "step": 95000 }, { - "epoch": 8.85, - "learning_rate": 6.780701395424971e-06, - "loss": 1.2679, - "step": 25450 + "epoch": 2.81, + "learning_rate": 3.8000254685636887e-06, + "loss": 2.6861, + "step": 95100 }, { - "epoch": 8.87, - "learning_rate": 6.678397511969554e-06, - "loss": 1.2999, - "step": 25500 + "epoch": 2.81, + "learning_rate": 3.7421423692710205e-06, + "loss": 2.5394, + "step": 95200 }, { - "epoch": 8.89, - "learning_rate": 6.5760936285141395e-06, - "loss": 1.3313, - "step": 25550 + "epoch": 2.81, + "learning_rate": 3.684259269978352e-06, + "loss": 2.5937, + "step": 95300 }, { - "epoch": 8.9, - "learning_rate": 6.473789745058722e-06, - "loss": 1.2963, - "step": 25600 + "epoch": 2.82, + "learning_rate": 3.626376170685683e-06, + "loss": 2.6151, + "step": 95400 }, { - "epoch": 8.92, - "learning_rate": 6.371485861603307e-06, - "loss": 1.2849, - "step": 25650 + "epoch": 2.82, + "learning_rate": 3.568493071393015e-06, + "loss": 2.6567, + "step": 95500 }, { - "epoch": 8.94, - "learning_rate": 6.269181978147891e-06, - "loss": 1.3351, - "step": 25700 + "epoch": 2.82, + "learning_rate": 3.5106099721003464e-06, + "loss": 2.5833, + "step": 95600 }, { - "epoch": 8.96, - "learning_rate": 6.1668780946924746e-06, - "loss": 1.3182, - "step": 25750 + "epoch": 2.83, + "learning_rate": 3.452726872807678e-06, + "loss": 2.6624, + "step": 95700 }, { - "epoch": 8.97, - "learning_rate": 6.064574211237059e-06, - "loss": 1.3212, - "step": 25800 + "epoch": 2.83, + "learning_rate": 3.394843773515009e-06, + "loss": 2.6202, + "step": 95800 }, { - "epoch": 8.99, - "learning_rate": 5.962270327781643e-06, - "loss": 1.3385, - "step": 25850 + "epoch": 2.83, + "learning_rate": 3.3369606742223405e-06, + "loss": 2.5676, + "step": 95900 }, { - "epoch": 9.0, - "eval_gen_len": 19.8291, - "eval_loss": 3.7051403522491455, - "eval_rouge1": 17.0903, - "eval_rouge2": 4.5413, - "eval_rougeL": 13.8897, - "eval_rougeLsum": 15.0091, - "eval_runtime": 251.3799, - "eval_samples_per_second": 9.241, - "eval_steps_per_second": 1.158, - "step": 25875 + "epoch": 2.83, + "learning_rate": 3.2790775749296723e-06, + "loss": 2.6249, + "step": 96000 }, { - "epoch": 9.01, - "learning_rate": 5.859966444326227e-06, - "loss": 1.2874, - "step": 25900 + "epoch": 2.84, + "learning_rate": 3.2211944756370037e-06, + "loss": 2.6144, + "step": 96100 }, { - "epoch": 9.03, - "learning_rate": 5.7576625608708105e-06, - "loss": 1.2224, - "step": 25950 + "epoch": 2.84, + "learning_rate": 3.163311376344335e-06, + "loss": 2.6163, + "step": 96200 }, { - "epoch": 9.04, - "learning_rate": 5.655358677415395e-06, - "loss": 1.1688, - "step": 26000 + "epoch": 2.84, + "learning_rate": 3.1054282770516664e-06, + "loss": 2.5738, + "step": 96300 }, { - "epoch": 9.06, - "learning_rate": 5.553054793959979e-06, - "loss": 1.1576, - "step": 26050 + "epoch": 2.85, + "learning_rate": 3.047545177758998e-06, + "loss": 2.6832, + "step": 96400 }, { - "epoch": 9.08, - "learning_rate": 5.450750910504563e-06, - "loss": 1.2135, - "step": 26100 + "epoch": 2.85, + "learning_rate": 2.9896620784663296e-06, + "loss": 2.5983, + "step": 96500 }, { - "epoch": 9.1, - "learning_rate": 5.348447027049147e-06, - "loss": 1.2315, - "step": 26150 + "epoch": 2.85, + "learning_rate": 2.932357810166588e-06, + "loss": 2.6802, + "step": 96600 }, { - "epoch": 9.11, - "learning_rate": 5.246143143593731e-06, - "loss": 1.1559, - "step": 26200 + "epoch": 2.85, + "learning_rate": 2.875053541866846e-06, + "loss": 2.6392, + "step": 96700 }, { - "epoch": 9.13, - "learning_rate": 5.143839260138316e-06, - "loss": 1.2484, - "step": 26250 + "epoch": 2.86, + "learning_rate": 2.817170442574177e-06, + "loss": 2.627, + "step": 96800 }, { - "epoch": 9.15, - "learning_rate": 5.0415353766828995e-06, - "loss": 1.2101, - "step": 26300 + "epoch": 2.86, + "learning_rate": 2.7592873432815088e-06, + "loss": 2.6258, + "step": 96900 }, { - "epoch": 9.17, - "learning_rate": 4.939231493227483e-06, - "loss": 1.2117, - "step": 26350 + "epoch": 2.86, + "learning_rate": 2.70140424398884e-06, + "loss": 2.5845, + "step": 97000 }, { - "epoch": 9.18, - "learning_rate": 4.836927609772067e-06, - "loss": 1.2629, - "step": 26400 + "epoch": 2.87, + "learning_rate": 2.643521144696172e-06, + "loss": 2.6863, + "step": 97100 }, { - "epoch": 9.2, - "learning_rate": 4.734623726316651e-06, - "loss": 1.2024, - "step": 26450 + "epoch": 2.87, + "learning_rate": 2.5856380454035033e-06, + "loss": 2.5749, + "step": 97200 }, { - "epoch": 9.22, - "learning_rate": 4.6323198428612354e-06, - "loss": 1.1946, - "step": 26500 + "epoch": 2.87, + "learning_rate": 2.5277549461108347e-06, + "loss": 2.5582, + "step": 97300 }, { - "epoch": 9.23, - "learning_rate": 4.530015959405819e-06, - "loss": 1.2109, - "step": 26550 + "epoch": 2.88, + "learning_rate": 2.469871846818166e-06, + "loss": 2.6249, + "step": 97400 }, { - "epoch": 9.25, - "learning_rate": 4.427712075950404e-06, - "loss": 1.2544, - "step": 26600 + "epoch": 2.88, + "learning_rate": 2.4119887475254974e-06, + "loss": 2.6195, + "step": 97500 }, { - "epoch": 9.27, - "learning_rate": 4.325408192494988e-06, - "loss": 1.1986, - "step": 26650 + "epoch": 2.88, + "learning_rate": 2.3541056482328292e-06, + "loss": 2.5851, + "step": 97600 }, { - "epoch": 9.29, - "learning_rate": 4.223104309039571e-06, - "loss": 1.2237, - "step": 26700 + "epoch": 2.88, + "learning_rate": 2.2962225489401606e-06, + "loss": 2.6251, + "step": 97700 }, { - "epoch": 9.3, - "learning_rate": 4.120800425584155e-06, - "loss": 1.1966, - "step": 26750 + "epoch": 2.89, + "learning_rate": 2.238339449647492e-06, + "loss": 2.6062, + "step": 97800 }, { - "epoch": 9.32, - "learning_rate": 4.018496542128739e-06, - "loss": 1.247, - "step": 26800 + "epoch": 2.89, + "learning_rate": 2.1804563503548233e-06, + "loss": 2.6002, + "step": 97900 }, { - "epoch": 9.34, - "learning_rate": 3.9161926586733236e-06, - "loss": 1.2958, - "step": 26850 + "epoch": 2.89, + "learning_rate": 2.122573251062155e-06, + "loss": 2.5926, + "step": 98000 }, { - "epoch": 9.36, - "learning_rate": 3.8138887752179073e-06, - "loss": 1.1975, - "step": 26900 + "epoch": 2.9, + "learning_rate": 2.0646901517694865e-06, + "loss": 2.5646, + "step": 98100 }, { - "epoch": 9.37, - "learning_rate": 3.7115848917624915e-06, - "loss": 1.2126, - "step": 26950 + "epoch": 2.9, + "learning_rate": 2.006807052476818e-06, + "loss": 2.6043, + "step": 98200 }, { - "epoch": 9.39, - "learning_rate": 3.6092810083070757e-06, - "loss": 1.177, - "step": 27000 + "epoch": 2.9, + "learning_rate": 1.9489239531841493e-06, + "loss": 2.6333, + "step": 98300 }, { - "epoch": 9.41, - "learning_rate": 3.50697712485166e-06, - "loss": 1.2372, - "step": 27050 + "epoch": 2.9, + "learning_rate": 1.891040853891481e-06, + "loss": 2.6832, + "step": 98400 }, { - "epoch": 9.43, - "learning_rate": 3.4046732413962437e-06, - "loss": 1.1955, - "step": 27100 + "epoch": 2.91, + "learning_rate": 1.8331577545988122e-06, + "loss": 2.5962, + "step": 98500 }, { - "epoch": 9.44, - "learning_rate": 3.302369357940827e-06, - "loss": 1.226, - "step": 27150 + "epoch": 2.91, + "learning_rate": 1.7752746553061438e-06, + "loss": 2.5622, + "step": 98600 }, { - "epoch": 9.46, - "learning_rate": 3.2000654744854113e-06, - "loss": 1.2704, - "step": 27200 + "epoch": 2.91, + "learning_rate": 1.7173915560134754e-06, + "loss": 2.5946, + "step": 98700 }, { - "epoch": 9.48, - "learning_rate": 3.0977615910299955e-06, - "loss": 1.1625, - "step": 27250 + "epoch": 2.92, + "learning_rate": 1.659508456720807e-06, + "loss": 2.5805, + "step": 98800 }, { - "epoch": 9.5, - "learning_rate": 2.9954577075745797e-06, - "loss": 1.2163, - "step": 27300 + "epoch": 2.92, + "learning_rate": 1.6016253574281381e-06, + "loss": 2.6398, + "step": 98900 }, { - "epoch": 9.51, - "learning_rate": 2.893153824119164e-06, - "loss": 1.1937, - "step": 27350 + "epoch": 2.92, + "learning_rate": 1.5437422581354697e-06, + "loss": 2.5873, + "step": 99000 }, { - "epoch": 9.53, - "learning_rate": 2.7908499406637476e-06, - "loss": 1.1698, - "step": 27400 + "epoch": 2.93, + "learning_rate": 1.485859158842801e-06, + "loss": 2.5955, + "step": 99100 }, { - "epoch": 9.55, - "learning_rate": 2.6885460572083314e-06, - "loss": 1.1571, - "step": 27450 + "epoch": 2.93, + "learning_rate": 1.4279760595501327e-06, + "loss": 2.6957, + "step": 99200 }, { - "epoch": 9.57, - "learning_rate": 2.5862421737529156e-06, - "loss": 1.1881, - "step": 27500 + "epoch": 2.93, + "learning_rate": 1.370092960257464e-06, + "loss": 2.5805, + "step": 99300 }, { - "epoch": 9.58, - "learning_rate": 2.4839382902975e-06, - "loss": 1.25, - "step": 27550 + "epoch": 2.93, + "learning_rate": 1.3122098609647956e-06, + "loss": 2.5943, + "step": 99400 }, { - "epoch": 9.6, - "learning_rate": 2.381634406842084e-06, - "loss": 1.1645, - "step": 27600 + "epoch": 2.94, + "learning_rate": 1.254326761672127e-06, + "loss": 2.652, + "step": 99500 }, { - "epoch": 9.62, - "learning_rate": 2.2793305233866682e-06, - "loss": 1.1695, - "step": 27650 + "epoch": 2.94, + "learning_rate": 1.1964436623794584e-06, + "loss": 2.656, + "step": 99600 }, { - "epoch": 9.63, - "learning_rate": 2.177026639931252e-06, - "loss": 1.1629, - "step": 27700 + "epoch": 2.94, + "learning_rate": 1.13856056308679e-06, + "loss": 2.5938, + "step": 99700 }, { - "epoch": 9.65, - "learning_rate": 2.0747227564758358e-06, - "loss": 1.2161, - "step": 27750 + "epoch": 2.95, + "learning_rate": 1.0806774637941215e-06, + "loss": 2.5805, + "step": 99800 }, { - "epoch": 9.67, - "learning_rate": 1.97241887302042e-06, - "loss": 1.2465, - "step": 27800 + "epoch": 2.95, + "learning_rate": 1.0227943645014529e-06, + "loss": 2.6259, + "step": 99900 }, { - "epoch": 9.69, - "learning_rate": 1.8701149895650042e-06, - "loss": 1.1754, - "step": 27850 + "epoch": 2.95, + "learning_rate": 9.649112652087845e-07, + "loss": 2.6734, + "step": 100000 }, { - "epoch": 9.7, - "learning_rate": 1.767811106109588e-06, - "loss": 1.1823, - "step": 27900 + "epoch": 2.95, + "learning_rate": 9.070281659161157e-07, + "loss": 2.6134, + "step": 100100 }, { - "epoch": 9.72, - "learning_rate": 1.665507222654172e-06, - "loss": 1.2259, - "step": 27950 + "epoch": 2.96, + "learning_rate": 8.491450666234473e-07, + "loss": 2.6146, + "step": 100200 }, { - "epoch": 9.74, - "learning_rate": 1.5632033391987561e-06, - "loss": 1.2193, - "step": 28000 + "epoch": 2.96, + "learning_rate": 7.912619673307788e-07, + "loss": 2.6645, + "step": 100300 }, { - "epoch": 9.76, - "learning_rate": 1.4608994557433401e-06, - "loss": 1.2087, - "step": 28050 + "epoch": 2.96, + "learning_rate": 7.333788680381103e-07, + "loss": 2.6002, + "step": 100400 }, { - "epoch": 9.77, - "learning_rate": 1.3585955722879241e-06, - "loss": 1.2028, - "step": 28100 + "epoch": 2.97, + "learning_rate": 6.754957687454418e-07, + "loss": 2.6512, + "step": 100500 }, { - "epoch": 9.79, - "learning_rate": 1.256291688832508e-06, - "loss": 1.2146, - "step": 28150 + "epoch": 2.97, + "learning_rate": 6.176126694527732e-07, + "loss": 2.6164, + "step": 100600 }, { - "epoch": 9.81, - "learning_rate": 1.1560338830462006e-06, - "loss": 1.226, - "step": 28200 + "epoch": 2.97, + "learning_rate": 5.597295701601047e-07, + "loss": 2.5653, + "step": 100700 }, { - "epoch": 9.83, - "learning_rate": 1.0537299995907844e-06, - "loss": 1.1566, - "step": 28250 + "epoch": 2.98, + "learning_rate": 5.024253018603628e-07, + "loss": 2.5852, + "step": 100800 }, { - "epoch": 9.84, - "learning_rate": 9.514261161353686e-07, - "loss": 1.1818, - "step": 28300 + "epoch": 2.98, + "learning_rate": 4.4454220256769427e-07, + "loss": 2.6347, + "step": 100900 }, { - "epoch": 9.86, - "learning_rate": 8.491222326799525e-07, - "loss": 1.1817, - "step": 28350 + "epoch": 2.98, + "learning_rate": 3.866591032750258e-07, + "loss": 2.589, + "step": 101000 }, { - "epoch": 9.88, - "learning_rate": 7.468183492245366e-07, - "loss": 1.2089, - "step": 28400 + "epoch": 2.98, + "learning_rate": 3.287760039823572e-07, + "loss": 2.5523, + "step": 101100 }, { - "epoch": 9.9, - "learning_rate": 6.445144657691206e-07, - "loss": 1.199, - "step": 28450 + "epoch": 2.99, + "learning_rate": 2.708929046896887e-07, + "loss": 2.6118, + "step": 101200 }, { - "epoch": 9.91, - "learning_rate": 5.422105823137046e-07, - "loss": 1.2147, - "step": 28500 + "epoch": 2.99, + "learning_rate": 2.130098053970202e-07, + "loss": 2.6409, + "step": 101300 }, { - "epoch": 9.93, - "learning_rate": 4.399066988582887e-07, - "loss": 1.2246, - "step": 28550 + "epoch": 2.99, + "learning_rate": 1.5570553709727834e-07, + "loss": 2.5427, + "step": 101400 }, { - "epoch": 9.95, - "learning_rate": 3.376028154028727e-07, - "loss": 1.1782, - "step": 28600 + "epoch": 3.0, + "learning_rate": 9.782243780460982e-08, + "loss": 2.6227, + "step": 101500 }, { - "epoch": 9.97, - "learning_rate": 2.3529893194745673e-07, - "loss": 1.2261, - "step": 28650 + "epoch": 3.0, + "learning_rate": 3.993933851194129e-08, + "loss": 2.5889, + "step": 101600 }, { - "epoch": 9.98, - "learning_rate": 1.3299504849204075e-07, - "loss": 1.1608, - "step": 28700 + "epoch": 3.0, + "eval_gen_len": 19.8855, + "eval_loss": 3.09702730178833, + "eval_rouge1": 19.1763, + "eval_rouge2": 5.4517, + "eval_rougeL": 15.5342, + "eval_rougeLsum": 16.7186, + "eval_runtime": 748.2031, + "eval_samples_per_second": 10.06, + "eval_steps_per_second": 2.515, + "step": 101625 }, { - "epoch": 10.0, - "learning_rate": 3.069116503662479e-08, - "loss": 1.2211, - "step": 28750 - }, - { - "epoch": 10.0, - "eval_gen_len": 19.9066, - "eval_loss": 3.790048360824585, - "eval_rouge1": 16.7843, - "eval_rouge2": 4.4907, - "eval_rougeL": 13.6418, - "eval_rougeLsum": 14.7366, - "eval_runtime": 251.7779, - "eval_samples_per_second": 9.226, - "eval_steps_per_second": 1.156, - "step": 28750 - }, - { - "epoch": 10.0, - "step": 28750, - "total_flos": 5.4143051427446784e+17, - "train_loss": 2.145213280321204, - "train_runtime": 23488.7209, - "train_samples_per_second": 9.792, - "train_steps_per_second": 1.224 + "epoch": 3.0, + "step": 101625, + "total_flos": 7.175645735046021e+17, + "train_loss": 3.0545432459982558, + "train_runtime": 34500.7445, + "train_samples_per_second": 11.782, + "train_steps_per_second": 2.946 } ], - "max_steps": 28750, - "num_train_epochs": 10, - "total_flos": 5.4143051427446784e+17, + "max_steps": 101625, + "num_train_epochs": 3, + "total_flos": 7.175645735046021e+17, "trial_name": null, "trial_params": null }