{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "global_step": 89859, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.0002983307181250626, "loss": 3.5314, "step": 500 }, { "epoch": 0.08, "learning_rate": 0.00029666143625012515, "loss": 3.2698, "step": 1000 }, { "epoch": 0.12, "learning_rate": 0.00029499215437518776, "loss": 3.1741, "step": 1500 }, { "epoch": 0.16, "learning_rate": 0.0002933228725002504, "loss": 3.1591, "step": 2000 }, { "epoch": 0.19, "learning_rate": 0.000291653590625313, "loss": 3.0795, "step": 2500 }, { "epoch": 0.23, "learning_rate": 0.00028998430875037555, "loss": 3.0526, "step": 3000 }, { "epoch": 0.27, "learning_rate": 0.00028831502687543816, "loss": 3.0228, "step": 3500 }, { "epoch": 0.31, "learning_rate": 0.0002866457450005008, "loss": 2.9966, "step": 4000 }, { "epoch": 0.35, "learning_rate": 0.0002849764631255634, "loss": 2.9799, "step": 4500 }, { "epoch": 0.39, "learning_rate": 0.00028330718125062595, "loss": 2.9632, "step": 5000 }, { "epoch": 0.39, "eval_gen_len": 18.7655, "eval_loss": 2.559084892272949, "eval_rouge1": 25.8521, "eval_rouge2": 11.6024, "eval_rougeL": 21.0887, "eval_rougeLsum": 22.857, "eval_runtime": 625.5093, "eval_samples_per_second": 20.521, "eval_steps_per_second": 2.566, "step": 5000 }, { "epoch": 0.43, "learning_rate": 0.00028163789937568856, "loss": 2.9402, "step": 5500 }, { "epoch": 0.47, "learning_rate": 0.0002799686175007512, "loss": 2.9157, "step": 6000 }, { "epoch": 0.51, "learning_rate": 0.00027829933562581373, "loss": 2.9143, "step": 6500 }, { "epoch": 0.55, "learning_rate": 0.00027663005375087635, "loss": 2.8869, "step": 7000 }, { "epoch": 0.58, "learning_rate": 0.0002749607718759389, "loss": 2.9195, "step": 7500 }, { "epoch": 0.62, "learning_rate": 0.0002732914900010015, "loss": 2.8558, "step": 8000 }, { "epoch": 0.66, "learning_rate": 0.00027162220812606414, "loss": 2.8594, "step": 8500 }, { "epoch": 0.7, "learning_rate": 0.00026995292625112675, "loss": 2.8485, "step": 9000 }, { "epoch": 0.74, "learning_rate": 0.0002682836443761893, "loss": 2.8491, "step": 9500 }, { "epoch": 0.78, "learning_rate": 0.0002666143625012519, "loss": 2.8327, "step": 10000 }, { "epoch": 0.78, "eval_gen_len": 18.8331, "eval_loss": 2.4890213012695312, "eval_rouge1": 26.508, "eval_rouge2": 12.2564, "eval_rougeL": 21.7467, "eval_rougeLsum": 23.5474, "eval_runtime": 624.7506, "eval_samples_per_second": 20.546, "eval_steps_per_second": 2.569, "step": 10000 }, { "epoch": 0.82, "learning_rate": 0.00026494508062631454, "loss": 2.8157, "step": 10500 }, { "epoch": 0.86, "learning_rate": 0.00026327579875137715, "loss": 2.8001, "step": 11000 }, { "epoch": 0.9, "learning_rate": 0.0002616065168764397, "loss": 2.84, "step": 11500 }, { "epoch": 0.93, "learning_rate": 0.0002599372350015023, "loss": 2.8051, "step": 12000 }, { "epoch": 0.97, "learning_rate": 0.00025826795312656494, "loss": 2.8138, "step": 12500 }, { "epoch": 1.01, "learning_rate": 0.00025659867125162755, "loss": 2.7677, "step": 13000 }, { "epoch": 1.05, "learning_rate": 0.0002549293893766901, "loss": 2.6905, "step": 13500 }, { "epoch": 1.09, "learning_rate": 0.0002532601075017527, "loss": 2.709, "step": 14000 }, { "epoch": 1.13, "learning_rate": 0.00025159082562681534, "loss": 2.7136, "step": 14500 }, { "epoch": 1.17, "learning_rate": 0.00024992154375187795, "loss": 2.6873, "step": 15000 }, { "epoch": 1.17, "eval_gen_len": 18.8424, "eval_loss": 2.4520416259765625, "eval_rouge1": 26.8614, "eval_rouge2": 12.7032, "eval_rougeL": 22.0965, "eval_rougeLsum": 23.832, "eval_runtime": 623.6597, "eval_samples_per_second": 20.582, "eval_steps_per_second": 2.574, "step": 15000 }, { "epoch": 1.21, "learning_rate": 0.0002482522618769405, "loss": 2.6962, "step": 15500 }, { "epoch": 1.25, "learning_rate": 0.0002465829800020031, "loss": 2.6779, "step": 16000 }, { "epoch": 1.29, "learning_rate": 0.00024491369812706574, "loss": 2.6964, "step": 16500 }, { "epoch": 1.32, "learning_rate": 0.00024324441625212832, "loss": 2.7048, "step": 17000 }, { "epoch": 1.36, "learning_rate": 0.00024157513437719094, "loss": 2.6687, "step": 17500 }, { "epoch": 1.4, "learning_rate": 0.00023990585250225352, "loss": 2.6869, "step": 18000 }, { "epoch": 1.44, "learning_rate": 0.00023823657062731608, "loss": 2.678, "step": 18500 }, { "epoch": 1.48, "learning_rate": 0.0002365672887523787, "loss": 2.6544, "step": 19000 }, { "epoch": 1.52, "learning_rate": 0.00023489800687744128, "loss": 2.6591, "step": 19500 }, { "epoch": 1.56, "learning_rate": 0.0002332287250025039, "loss": 2.6572, "step": 20000 }, { "epoch": 1.56, "eval_gen_len": 18.727, "eval_loss": 2.4031243324279785, "eval_rouge1": 27.0114, "eval_rouge2": 12.8148, "eval_rougeL": 22.2407, "eval_rougeLsum": 24.0602, "eval_runtime": 626.2755, "eval_samples_per_second": 20.496, "eval_steps_per_second": 2.563, "step": 20000 }, { "epoch": 1.6, "learning_rate": 0.00023155944312756648, "loss": 2.628, "step": 20500 }, { "epoch": 1.64, "learning_rate": 0.0002298901612526291, "loss": 2.6544, "step": 21000 }, { "epoch": 1.67, "learning_rate": 0.00022822087937769168, "loss": 2.6792, "step": 21500 }, { "epoch": 1.71, "learning_rate": 0.0002265515975027543, "loss": 2.6493, "step": 22000 }, { "epoch": 1.75, "learning_rate": 0.00022488231562781688, "loss": 2.6385, "step": 22500 }, { "epoch": 1.79, "learning_rate": 0.0002232130337528795, "loss": 2.6368, "step": 23000 }, { "epoch": 1.83, "learning_rate": 0.00022154375187794208, "loss": 2.6319, "step": 23500 }, { "epoch": 1.87, "learning_rate": 0.0002198744700030047, "loss": 2.6764, "step": 24000 }, { "epoch": 1.91, "learning_rate": 0.00021820518812806729, "loss": 2.6729, "step": 24500 }, { "epoch": 1.95, "learning_rate": 0.0002165359062531299, "loss": 2.6461, "step": 25000 }, { "epoch": 1.95, "eval_gen_len": 18.7416, "eval_loss": 2.391615629196167, "eval_rouge1": 27.2287, "eval_rouge2": 12.9935, "eval_rougeL": 22.4718, "eval_rougeLsum": 24.2517, "eval_runtime": 622.8089, "eval_samples_per_second": 20.61, "eval_steps_per_second": 2.577, "step": 25000 }, { "epoch": 1.99, "learning_rate": 0.00021486662437819249, "loss": 2.6244, "step": 25500 }, { "epoch": 2.03, "learning_rate": 0.0002131973425032551, "loss": 2.5805, "step": 26000 }, { "epoch": 2.06, "learning_rate": 0.00021152806062831769, "loss": 2.5407, "step": 26500 }, { "epoch": 2.1, "learning_rate": 0.00020985877875338027, "loss": 2.5537, "step": 27000 }, { "epoch": 2.14, "learning_rate": 0.00020818949687844289, "loss": 2.5253, "step": 27500 }, { "epoch": 2.18, "learning_rate": 0.00020652021500350547, "loss": 2.5401, "step": 28000 }, { "epoch": 2.22, "learning_rate": 0.00020485093312856809, "loss": 2.5245, "step": 28500 }, { "epoch": 2.26, "learning_rate": 0.00020318165125363067, "loss": 2.547, "step": 29000 }, { "epoch": 2.3, "learning_rate": 0.00020151236937869329, "loss": 2.5377, "step": 29500 }, { "epoch": 2.34, "learning_rate": 0.00019984308750375587, "loss": 2.5374, "step": 30000 }, { "epoch": 2.34, "eval_gen_len": 18.8003, "eval_loss": 2.3686139583587646, "eval_rouge1": 27.5061, "eval_rouge2": 13.241, "eval_rougeL": 22.6877, "eval_rougeLsum": 24.4465, "eval_runtime": 629.1948, "eval_samples_per_second": 20.401, "eval_steps_per_second": 2.551, "step": 30000 }, { "epoch": 2.38, "learning_rate": 0.00019817380562881846, "loss": 2.5457, "step": 30500 }, { "epoch": 2.41, "learning_rate": 0.00019650452375388105, "loss": 2.5105, "step": 31000 }, { "epoch": 2.45, "learning_rate": 0.00019483524187894366, "loss": 2.5367, "step": 31500 }, { "epoch": 2.49, "learning_rate": 0.00019316596000400625, "loss": 2.5471, "step": 32000 }, { "epoch": 2.53, "learning_rate": 0.00019149667812906886, "loss": 2.5488, "step": 32500 }, { "epoch": 2.57, "learning_rate": 0.00018982739625413145, "loss": 2.5402, "step": 33000 }, { "epoch": 2.61, "learning_rate": 0.00018815811437919406, "loss": 2.5437, "step": 33500 }, { "epoch": 2.65, "learning_rate": 0.00018648883250425665, "loss": 2.5244, "step": 34000 }, { "epoch": 2.69, "learning_rate": 0.00018481955062931923, "loss": 2.5389, "step": 34500 }, { "epoch": 2.73, "learning_rate": 0.00018315026875438185, "loss": 2.5081, "step": 35000 }, { "epoch": 2.73, "eval_gen_len": 18.7821, "eval_loss": 2.3596315383911133, "eval_rouge1": 27.4715, "eval_rouge2": 13.2862, "eval_rougeL": 22.7022, "eval_rougeLsum": 24.4252, "eval_runtime": 624.9994, "eval_samples_per_second": 20.538, "eval_steps_per_second": 2.568, "step": 35000 }, { "epoch": 2.77, "learning_rate": 0.00018148098687944443, "loss": 2.5425, "step": 35500 }, { "epoch": 2.8, "learning_rate": 0.00017981170500450705, "loss": 2.5245, "step": 36000 }, { "epoch": 2.84, "learning_rate": 0.00017814242312956963, "loss": 2.506, "step": 36500 }, { "epoch": 2.88, "learning_rate": 0.00017647314125463225, "loss": 2.542, "step": 37000 }, { "epoch": 2.92, "learning_rate": 0.00017480385937969483, "loss": 2.5252, "step": 37500 }, { "epoch": 2.96, "learning_rate": 0.00017313457750475745, "loss": 2.5175, "step": 38000 }, { "epoch": 3.0, "learning_rate": 0.00017146529562982003, "loss": 2.5091, "step": 38500 }, { "epoch": 3.04, "learning_rate": 0.00016979601375488265, "loss": 2.4468, "step": 39000 }, { "epoch": 3.08, "learning_rate": 0.00016812673187994523, "loss": 2.447, "step": 39500 }, { "epoch": 3.12, "learning_rate": 0.00016645745000500785, "loss": 2.4152, "step": 40000 }, { "epoch": 3.12, "eval_gen_len": 18.8203, "eval_loss": 2.326728105545044, "eval_rouge1": 27.9595, "eval_rouge2": 13.5813, "eval_rougeL": 23.0493, "eval_rougeLsum": 24.9203, "eval_runtime": 625.9414, "eval_samples_per_second": 20.507, "eval_steps_per_second": 2.564, "step": 40000 }, { "epoch": 3.15, "learning_rate": 0.00016478816813007043, "loss": 2.4379, "step": 40500 }, { "epoch": 3.19, "learning_rate": 0.00016311888625513305, "loss": 2.4489, "step": 41000 }, { "epoch": 3.23, "learning_rate": 0.00016144960438019564, "loss": 2.4212, "step": 41500 }, { "epoch": 3.27, "learning_rate": 0.00015978032250525825, "loss": 2.4203, "step": 42000 }, { "epoch": 3.31, "learning_rate": 0.0001581110406303208, "loss": 2.433, "step": 42500 }, { "epoch": 3.35, "learning_rate": 0.0001564417587553834, "loss": 2.432, "step": 43000 }, { "epoch": 3.39, "learning_rate": 0.000154772476880446, "loss": 2.4299, "step": 43500 }, { "epoch": 3.43, "learning_rate": 0.0001531031950055086, "loss": 2.4201, "step": 44000 }, { "epoch": 3.47, "learning_rate": 0.0001514339131305712, "loss": 2.4314, "step": 44500 }, { "epoch": 3.51, "learning_rate": 0.00014976463125563382, "loss": 2.4387, "step": 45000 }, { "epoch": 3.51, "eval_gen_len": 18.8376, "eval_loss": 2.317692518234253, "eval_rouge1": 28.1616, "eval_rouge2": 13.668, "eval_rougeL": 23.1738, "eval_rougeLsum": 25.0342, "eval_runtime": 622.4609, "eval_samples_per_second": 20.621, "eval_steps_per_second": 2.578, "step": 45000 }, { "epoch": 3.54, "learning_rate": 0.0001480953493806964, "loss": 2.4389, "step": 45500 }, { "epoch": 3.58, "learning_rate": 0.000146426067505759, "loss": 2.4363, "step": 46000 }, { "epoch": 3.62, "learning_rate": 0.0001447567856308216, "loss": 2.4395, "step": 46500 }, { "epoch": 3.66, "learning_rate": 0.0001430875037558842, "loss": 2.4418, "step": 47000 }, { "epoch": 3.7, "learning_rate": 0.0001414182218809468, "loss": 2.4201, "step": 47500 }, { "epoch": 3.74, "learning_rate": 0.0001397489400060094, "loss": 2.4169, "step": 48000 }, { "epoch": 3.78, "learning_rate": 0.000138079658131072, "loss": 2.4009, "step": 48500 }, { "epoch": 3.82, "learning_rate": 0.0001364103762561346, "loss": 2.4279, "step": 49000 }, { "epoch": 3.86, "learning_rate": 0.0001347410943811972, "loss": 2.4307, "step": 49500 }, { "epoch": 3.89, "learning_rate": 0.0001330718125062598, "loss": 2.4387, "step": 50000 }, { "epoch": 3.89, "eval_gen_len": 18.7872, "eval_loss": 2.3016672134399414, "eval_rouge1": 28.1111, "eval_rouge2": 13.569, "eval_rougeL": 23.1312, "eval_rougeLsum": 24.9912, "eval_runtime": 628.1204, "eval_samples_per_second": 20.436, "eval_steps_per_second": 2.555, "step": 50000 }, { "epoch": 3.93, "learning_rate": 0.0001314025306313224, "loss": 2.4143, "step": 50500 }, { "epoch": 3.97, "learning_rate": 0.000129733248756385, "loss": 2.4197, "step": 51000 }, { "epoch": 4.01, "learning_rate": 0.00012806396688144758, "loss": 2.4084, "step": 51500 }, { "epoch": 4.05, "learning_rate": 0.00012639468500651017, "loss": 2.3544, "step": 52000 }, { "epoch": 4.09, "learning_rate": 0.00012472540313157278, "loss": 2.332, "step": 52500 }, { "epoch": 4.13, "learning_rate": 0.00012305612125663537, "loss": 2.3597, "step": 53000 }, { "epoch": 4.17, "learning_rate": 0.00012138683938169798, "loss": 2.3492, "step": 53500 }, { "epoch": 4.21, "learning_rate": 0.00011971755750676058, "loss": 2.3679, "step": 54000 }, { "epoch": 4.25, "learning_rate": 0.00011804827563182318, "loss": 2.3633, "step": 54500 }, { "epoch": 4.28, "learning_rate": 0.00011637899375688578, "loss": 2.3467, "step": 55000 }, { "epoch": 4.28, "eval_gen_len": 18.8334, "eval_loss": 2.3123602867126465, "eval_rouge1": 28.0679, "eval_rouge2": 13.7123, "eval_rougeL": 23.1516, "eval_rougeLsum": 25.0002, "eval_runtime": 622.7792, "eval_samples_per_second": 20.611, "eval_steps_per_second": 2.577, "step": 55000 }, { "epoch": 4.32, "learning_rate": 0.00011470971188194838, "loss": 2.3579, "step": 55500 }, { "epoch": 4.36, "learning_rate": 0.00011304043000701098, "loss": 2.3501, "step": 56000 }, { "epoch": 4.4, "learning_rate": 0.00011137114813207358, "loss": 2.3592, "step": 56500 }, { "epoch": 4.44, "learning_rate": 0.00010970186625713618, "loss": 2.344, "step": 57000 }, { "epoch": 4.48, "learning_rate": 0.00010803258438219876, "loss": 2.3578, "step": 57500 }, { "epoch": 4.52, "learning_rate": 0.00010636330250726136, "loss": 2.3407, "step": 58000 }, { "epoch": 4.56, "learning_rate": 0.00010469402063232396, "loss": 2.3452, "step": 58500 }, { "epoch": 4.6, "learning_rate": 0.00010302473875738656, "loss": 2.3465, "step": 59000 }, { "epoch": 4.64, "learning_rate": 0.00010135545688244916, "loss": 2.3687, "step": 59500 }, { "epoch": 4.67, "learning_rate": 9.968617500751176e-05, "loss": 2.3367, "step": 60000 }, { "epoch": 4.67, "eval_gen_len": 18.835, "eval_loss": 2.2966153621673584, "eval_rouge1": 28.293, "eval_rouge2": 13.9084, "eval_rougeL": 23.3359, "eval_rougeLsum": 25.1789, "eval_runtime": 625.9275, "eval_samples_per_second": 20.507, "eval_steps_per_second": 2.564, "step": 60000 }, { "epoch": 4.71, "learning_rate": 9.801689313257436e-05, "loss": 2.3306, "step": 60500 }, { "epoch": 4.75, "learning_rate": 9.634761125763696e-05, "loss": 2.3497, "step": 61000 }, { "epoch": 4.79, "learning_rate": 9.467832938269956e-05, "loss": 2.3313, "step": 61500 }, { "epoch": 4.83, "learning_rate": 9.300904750776216e-05, "loss": 2.3427, "step": 62000 }, { "epoch": 4.87, "learning_rate": 9.133976563282476e-05, "loss": 2.3259, "step": 62500 }, { "epoch": 4.91, "learning_rate": 8.967048375788736e-05, "loss": 2.3544, "step": 63000 }, { "epoch": 4.95, "learning_rate": 8.800120188294995e-05, "loss": 2.3307, "step": 63500 }, { "epoch": 4.99, "learning_rate": 8.633192000801255e-05, "loss": 2.3477, "step": 64000 }, { "epoch": 5.02, "learning_rate": 8.466263813307513e-05, "loss": 2.3064, "step": 64500 }, { "epoch": 5.06, "learning_rate": 8.299335625813773e-05, "loss": 2.2882, "step": 65000 }, { "epoch": 5.06, "eval_gen_len": 18.7974, "eval_loss": 2.2921857833862305, "eval_rouge1": 28.3828, "eval_rouge2": 14.0129, "eval_rougeL": 23.443, "eval_rougeLsum": 25.3001, "eval_runtime": 621.9943, "eval_samples_per_second": 20.637, "eval_steps_per_second": 2.58, "step": 65000 }, { "epoch": 5.1, "learning_rate": 8.132407438320033e-05, "loss": 2.2733, "step": 65500 }, { "epoch": 5.14, "learning_rate": 7.965479250826293e-05, "loss": 2.2869, "step": 66000 }, { "epoch": 5.18, "learning_rate": 7.798551063332553e-05, "loss": 2.2904, "step": 66500 }, { "epoch": 5.22, "learning_rate": 7.631622875838813e-05, "loss": 2.2516, "step": 67000 }, { "epoch": 5.26, "learning_rate": 7.464694688345073e-05, "loss": 2.2835, "step": 67500 }, { "epoch": 5.3, "learning_rate": 7.297766500851333e-05, "loss": 2.2802, "step": 68000 }, { "epoch": 5.34, "learning_rate": 7.130838313357593e-05, "loss": 2.2779, "step": 68500 }, { "epoch": 5.38, "learning_rate": 6.963910125863853e-05, "loss": 2.274, "step": 69000 }, { "epoch": 5.41, "learning_rate": 6.796981938370112e-05, "loss": 2.2908, "step": 69500 }, { "epoch": 5.45, "learning_rate": 6.630053750876372e-05, "loss": 2.2782, "step": 70000 }, { "epoch": 5.45, "eval_gen_len": 18.8495, "eval_loss": 2.2865357398986816, "eval_rouge1": 28.3987, "eval_rouge2": 13.9705, "eval_rougeL": 23.4227, "eval_rougeLsum": 25.3263, "eval_runtime": 625.1399, "eval_samples_per_second": 20.533, "eval_steps_per_second": 2.567, "step": 70000 }, { "epoch": 5.49, "learning_rate": 6.463125563382632e-05, "loss": 2.2882, "step": 70500 }, { "epoch": 5.53, "learning_rate": 6.296197375888892e-05, "loss": 2.2802, "step": 71000 }, { "epoch": 5.57, "learning_rate": 6.129269188395152e-05, "loss": 2.2774, "step": 71500 }, { "epoch": 5.61, "learning_rate": 5.9623410009014114e-05, "loss": 2.31, "step": 72000 }, { "epoch": 5.65, "learning_rate": 5.7954128134076714e-05, "loss": 2.2829, "step": 72500 }, { "epoch": 5.69, "learning_rate": 5.6284846259139314e-05, "loss": 2.2971, "step": 73000 }, { "epoch": 5.73, "learning_rate": 5.4615564384201915e-05, "loss": 2.2824, "step": 73500 }, { "epoch": 5.76, "learning_rate": 5.294628250926451e-05, "loss": 2.2754, "step": 74000 }, { "epoch": 5.8, "learning_rate": 5.127700063432711e-05, "loss": 2.2893, "step": 74500 }, { "epoch": 5.84, "learning_rate": 4.960771875938971e-05, "loss": 2.2788, "step": 75000 }, { "epoch": 5.84, "eval_gen_len": 18.7967, "eval_loss": 2.278130531311035, "eval_rouge1": 28.4256, "eval_rouge2": 14.0668, "eval_rougeL": 23.4947, "eval_rougeLsum": 25.3403, "eval_runtime": 622.5919, "eval_samples_per_second": 20.617, "eval_steps_per_second": 2.578, "step": 75000 }, { "epoch": 5.88, "learning_rate": 4.793843688445231e-05, "loss": 2.2898, "step": 75500 }, { "epoch": 5.92, "learning_rate": 4.62691550095149e-05, "loss": 2.281, "step": 76000 }, { "epoch": 5.96, "learning_rate": 4.45998731345775e-05, "loss": 2.28, "step": 76500 }, { "epoch": 6.0, "learning_rate": 4.2930591259640095e-05, "loss": 2.269, "step": 77000 }, { "epoch": 6.04, "learning_rate": 4.1261309384702695e-05, "loss": 2.2428, "step": 77500 }, { "epoch": 6.08, "learning_rate": 3.9592027509765295e-05, "loss": 2.2627, "step": 78000 }, { "epoch": 6.12, "learning_rate": 3.7922745634827896e-05, "loss": 2.2219, "step": 78500 }, { "epoch": 6.15, "learning_rate": 3.625346375989049e-05, "loss": 2.218, "step": 79000 }, { "epoch": 6.19, "learning_rate": 3.458418188495309e-05, "loss": 2.2205, "step": 79500 }, { "epoch": 6.23, "learning_rate": 3.291490001001569e-05, "loss": 2.239, "step": 80000 }, { "epoch": 6.23, "eval_gen_len": 18.8384, "eval_loss": 2.2816860675811768, "eval_rouge1": 28.4681, "eval_rouge2": 14.0835, "eval_rougeL": 23.5141, "eval_rougeLsum": 25.4023, "eval_runtime": 624.366, "eval_samples_per_second": 20.558, "eval_steps_per_second": 2.571, "step": 80000 }, { "epoch": 6.27, "learning_rate": 3.124561813507828e-05, "loss": 2.2095, "step": 80500 }, { "epoch": 6.31, "learning_rate": 2.9576336260140883e-05, "loss": 2.2441, "step": 81000 }, { "epoch": 6.35, "learning_rate": 2.7907054385203483e-05, "loss": 2.2292, "step": 81500 }, { "epoch": 6.39, "learning_rate": 2.623777251026608e-05, "loss": 2.2482, "step": 82000 }, { "epoch": 6.43, "learning_rate": 2.456849063532868e-05, "loss": 2.2243, "step": 82500 }, { "epoch": 6.47, "learning_rate": 2.289920876039128e-05, "loss": 2.2177, "step": 83000 }, { "epoch": 6.5, "learning_rate": 2.1229926885453873e-05, "loss": 2.2317, "step": 83500 }, { "epoch": 6.54, "learning_rate": 1.9560645010516473e-05, "loss": 2.2311, "step": 84000 }, { "epoch": 6.58, "learning_rate": 1.7891363135579073e-05, "loss": 2.2446, "step": 84500 }, { "epoch": 6.62, "learning_rate": 1.622208126064167e-05, "loss": 2.2324, "step": 85000 }, { "epoch": 6.62, "eval_gen_len": 18.8369, "eval_loss": 2.27907395362854, "eval_rouge1": 28.5356, "eval_rouge2": 14.1871, "eval_rougeL": 23.5477, "eval_rougeLsum": 25.4279, "eval_runtime": 621.3714, "eval_samples_per_second": 20.658, "eval_steps_per_second": 2.583, "step": 85000 }, { "epoch": 6.66, "learning_rate": 1.4552799385704269e-05, "loss": 2.241, "step": 85500 }, { "epoch": 6.7, "learning_rate": 1.2883517510766869e-05, "loss": 2.2262, "step": 86000 }, { "epoch": 6.74, "learning_rate": 1.1214235635829465e-05, "loss": 2.2456, "step": 86500 }, { "epoch": 6.78, "learning_rate": 9.544953760892062e-06, "loss": 2.2237, "step": 87000 }, { "epoch": 6.82, "learning_rate": 7.875671885954662e-06, "loss": 2.2294, "step": 87500 }, { "epoch": 6.86, "learning_rate": 6.20639001101726e-06, "loss": 2.2184, "step": 88000 }, { "epoch": 6.89, "learning_rate": 4.537108136079858e-06, "loss": 2.2167, "step": 88500 }, { "epoch": 6.93, "learning_rate": 2.867826261142456e-06, "loss": 2.2478, "step": 89000 }, { "epoch": 6.97, "learning_rate": 1.1985443862050543e-06, "loss": 2.2596, "step": 89500 }, { "epoch": 7.0, "step": 89859, "total_flos": 2.6510315204815258e+17, "train_loss": 2.493601185993256, "train_runtime": 53594.8094, "train_samples_per_second": 13.413, "train_steps_per_second": 1.677 } ], "max_steps": 89859, "num_train_epochs": 7, "total_flos": 2.6510315204815258e+17, "trial_name": null, "trial_params": null }