{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 100, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 5.555555555555555e-05, "loss": 41.2711, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.0001111111111111111, "loss": 36.9402, "step": 20 }, { "epoch": 0.06, "learning_rate": 0.00016666666666666666, "loss": 30.5965, "step": 30 }, { "epoch": 0.08, "learning_rate": 0.0002222222222222222, "loss": 25.168, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.0002777777777777778, "loss": 20.2342, "step": 50 }, { "epoch": 0.12, "learning_rate": 0.0003333333333333333, "loss": 15.0822, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.0003888888888888889, "loss": 10.8182, "step": 70 }, { "epoch": 0.16, "learning_rate": 0.0004444444444444444, "loss": 7.5259, "step": 80 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 5.6658, "step": 90 }, { "epoch": 0.2, "learning_rate": 0.0004989816700610998, "loss": 4.1956, "step": 100 }, { "epoch": 0.2, "eval_loss": 3.1490368843078613, "eval_rouge1": 0.15401031761230216, "eval_rouge2": 0.06183084855722348, "eval_rougeL": 0.1487638574423534, "eval_rougeLsum": 0.1488931379114581, "eval_runtime": 12.0897, "eval_samples_per_second": 1.654, "eval_steps_per_second": 0.827, "step": 100 }, { "epoch": 0.22, "learning_rate": 0.0004979633401221996, "loss": 3.6249, "step": 110 }, { "epoch": 0.24, "learning_rate": 0.0004969450101832995, "loss": 3.271, "step": 120 }, { "epoch": 0.26, "learning_rate": 0.0004959266802443992, "loss": 3.1977, "step": 130 }, { "epoch": 0.28, "learning_rate": 0.000494908350305499, "loss": 2.8583, "step": 140 }, { "epoch": 0.3, "learning_rate": 0.0004938900203665988, "loss": 2.5156, "step": 150 }, { "epoch": 0.32, "learning_rate": 0.0004928716904276986, "loss": 2.1851, "step": 160 }, { "epoch": 0.34, "learning_rate": 0.0004918533604887983, "loss": 1.9845, "step": 170 }, { "epoch": 0.36, "learning_rate": 0.0004908350305498982, "loss": 1.8303, "step": 180 }, { "epoch": 0.38, "learning_rate": 0.0004898167006109979, "loss": 1.7903, "step": 190 }, { "epoch": 0.4, "learning_rate": 0.0004887983706720978, "loss": 1.7158, "step": 200 }, { "epoch": 0.4, "eval_loss": 1.259326696395874, "eval_rouge1": 0.3038539506809194, "eval_rouge2": 0.1446061100175765, "eval_rougeL": 0.2647138754144953, "eval_rougeLsum": 0.26637210414553936, "eval_runtime": 18.5868, "eval_samples_per_second": 1.076, "eval_steps_per_second": 0.538, "step": 200 }, { "epoch": 0.42, "learning_rate": 0.0004877800407331975, "loss": 1.7002, "step": 210 }, { "epoch": 0.44, "learning_rate": 0.0004867617107942974, "loss": 1.6374, "step": 220 }, { "epoch": 0.46, "learning_rate": 0.00048574338085539715, "loss": 1.7013, "step": 230 }, { "epoch": 0.48, "learning_rate": 0.0004847250509164969, "loss": 1.624, "step": 240 }, { "epoch": 0.5, "learning_rate": 0.0004837067209775968, "loss": 1.6216, "step": 250 }, { "epoch": 0.52, "learning_rate": 0.00048268839103869654, "loss": 1.5784, "step": 260 }, { "epoch": 0.54, "learning_rate": 0.00048167006109979635, "loss": 1.5394, "step": 270 }, { "epoch": 0.56, "learning_rate": 0.00048065173116089617, "loss": 1.5302, "step": 280 }, { "epoch": 0.58, "learning_rate": 0.00047963340122199593, "loss": 1.5071, "step": 290 }, { "epoch": 0.6, "learning_rate": 0.00047861507128309574, "loss": 1.526, "step": 300 }, { "epoch": 0.6, "eval_loss": 1.1715915203094482, "eval_rouge1": 0.29557394630545675, "eval_rouge2": 0.1360179746171492, "eval_rougeL": 0.25515731019715504, "eval_rougeLsum": 0.25695835000784906, "eval_runtime": 27.3621, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.365, "step": 300 }, { "epoch": 0.62, "learning_rate": 0.0004775967413441955, "loss": 1.4737, "step": 310 }, { "epoch": 0.64, "learning_rate": 0.0004765784114052953, "loss": 1.4955, "step": 320 }, { "epoch": 0.66, "learning_rate": 0.00047556008146639513, "loss": 1.4392, "step": 330 }, { "epoch": 0.68, "learning_rate": 0.0004745417515274949, "loss": 1.4535, "step": 340 }, { "epoch": 0.7, "learning_rate": 0.00047352342158859476, "loss": 1.4625, "step": 350 }, { "epoch": 0.72, "learning_rate": 0.0004725050916496945, "loss": 1.4353, "step": 360 }, { "epoch": 0.74, "learning_rate": 0.0004714867617107943, "loss": 1.4088, "step": 370 }, { "epoch": 0.76, "learning_rate": 0.0004704684317718941, "loss": 1.4472, "step": 380 }, { "epoch": 0.78, "learning_rate": 0.0004694501018329939, "loss": 1.4318, "step": 390 }, { "epoch": 0.8, "learning_rate": 0.0004684317718940937, "loss": 1.4318, "step": 400 }, { "epoch": 0.8, "eval_loss": 1.1233234405517578, "eval_rouge1": 0.33840282066392835, "eval_rouge2": 0.16041046986254748, "eval_rougeL": 0.2758850020825072, "eval_rougeLsum": 0.27526491091998107, "eval_runtime": 25.6589, "eval_samples_per_second": 0.779, "eval_steps_per_second": 0.39, "step": 400 }, { "epoch": 0.82, "learning_rate": 0.0004674134419551935, "loss": 1.3872, "step": 410 }, { "epoch": 0.84, "learning_rate": 0.0004663951120162933, "loss": 1.4539, "step": 420 }, { "epoch": 0.86, "learning_rate": 0.00046537678207739307, "loss": 1.4651, "step": 430 }, { "epoch": 0.88, "learning_rate": 0.0004643584521384929, "loss": 1.3875, "step": 440 }, { "epoch": 0.9, "learning_rate": 0.00046334012219959264, "loss": 1.4463, "step": 450 }, { "epoch": 0.92, "learning_rate": 0.0004623217922606925, "loss": 1.4106, "step": 460 }, { "epoch": 0.94, "learning_rate": 0.00046130346232179227, "loss": 1.3987, "step": 470 }, { "epoch": 0.96, "learning_rate": 0.00046028513238289203, "loss": 1.4239, "step": 480 }, { "epoch": 0.98, "learning_rate": 0.0004592668024439919, "loss": 1.4184, "step": 490 }, { "epoch": 1.0, "learning_rate": 0.00045824847250509166, "loss": 1.438, "step": 500 }, { "epoch": 1.0, "eval_loss": 1.0997542142868042, "eval_rouge1": 0.31808868620058295, "eval_rouge2": 0.15095978898822576, "eval_rougeL": 0.2758976305910347, "eval_rougeLsum": 0.2783141924283456, "eval_runtime": 24.4917, "eval_samples_per_second": 0.817, "eval_steps_per_second": 0.408, "step": 500 }, { "epoch": 1.02, "learning_rate": 0.0004572301425661914, "loss": 1.3892, "step": 510 }, { "epoch": 1.04, "learning_rate": 0.0004562118126272913, "loss": 1.3238, "step": 520 }, { "epoch": 1.06, "learning_rate": 0.00045519348268839105, "loss": 1.3365, "step": 530 }, { "epoch": 1.08, "learning_rate": 0.0004541751527494908, "loss": 1.3769, "step": 540 }, { "epoch": 1.1, "learning_rate": 0.0004531568228105906, "loss": 1.337, "step": 550 }, { "epoch": 1.12, "learning_rate": 0.00045213849287169044, "loss": 1.3606, "step": 560 }, { "epoch": 1.14, "learning_rate": 0.00045112016293279026, "loss": 1.3643, "step": 570 }, { "epoch": 1.16, "learning_rate": 0.00045010183299389, "loss": 1.297, "step": 580 }, { "epoch": 1.18, "learning_rate": 0.00044908350305498983, "loss": 1.3871, "step": 590 }, { "epoch": 1.2, "learning_rate": 0.00044806517311608965, "loss": 1.3163, "step": 600 }, { "epoch": 1.2, "eval_loss": 1.0784223079681396, "eval_rouge1": 0.3334137177907346, "eval_rouge2": 0.15261190778459077, "eval_rougeL": 0.2719649985784092, "eval_rougeLsum": 0.27272497195004963, "eval_runtime": 21.6088, "eval_samples_per_second": 0.926, "eval_steps_per_second": 0.463, "step": 600 }, { "epoch": 1.22, "learning_rate": 0.0004470468431771894, "loss": 1.3417, "step": 610 }, { "epoch": 1.24, "learning_rate": 0.00044602851323828917, "loss": 1.3073, "step": 620 }, { "epoch": 1.26, "learning_rate": 0.00044501018329938904, "loss": 1.3036, "step": 630 }, { "epoch": 1.28, "learning_rate": 0.0004439918533604888, "loss": 1.3535, "step": 640 }, { "epoch": 1.3, "learning_rate": 0.0004429735234215886, "loss": 1.334, "step": 650 }, { "epoch": 1.32, "learning_rate": 0.00044195519348268843, "loss": 1.372, "step": 660 }, { "epoch": 1.34, "learning_rate": 0.0004409368635437882, "loss": 1.3312, "step": 670 }, { "epoch": 1.36, "learning_rate": 0.000439918533604888, "loss": 1.3126, "step": 680 }, { "epoch": 1.38, "learning_rate": 0.00043890020366598776, "loss": 1.3103, "step": 690 }, { "epoch": 1.4, "learning_rate": 0.0004378818737270876, "loss": 1.3625, "step": 700 }, { "epoch": 1.4, "eval_loss": 1.0480375289916992, "eval_rouge1": 0.3252587017685009, "eval_rouge2": 0.14695387868011492, "eval_rougeL": 0.2819448898014987, "eval_rougeLsum": 0.2825983453315859, "eval_runtime": 21.3066, "eval_samples_per_second": 0.939, "eval_steps_per_second": 0.469, "step": 700 }, { "epoch": 1.42, "learning_rate": 0.0004368635437881874, "loss": 1.3607, "step": 710 }, { "epoch": 1.44, "learning_rate": 0.00043584521384928715, "loss": 1.3053, "step": 720 }, { "epoch": 1.46, "learning_rate": 0.00043482688391038697, "loss": 1.306, "step": 730 }, { "epoch": 1.48, "learning_rate": 0.0004338085539714868, "loss": 1.3133, "step": 740 }, { "epoch": 1.5, "learning_rate": 0.00043279022403258654, "loss": 1.3261, "step": 750 }, { "epoch": 1.52, "learning_rate": 0.0004317718940936864, "loss": 1.2707, "step": 760 }, { "epoch": 1.54, "learning_rate": 0.0004307535641547862, "loss": 1.305, "step": 770 }, { "epoch": 1.56, "learning_rate": 0.00042973523421588593, "loss": 1.3298, "step": 780 }, { "epoch": 1.58, "learning_rate": 0.00042871690427698575, "loss": 1.295, "step": 790 }, { "epoch": 1.6, "learning_rate": 0.00042769857433808556, "loss": 1.3147, "step": 800 }, { "epoch": 1.6, "eval_loss": 1.0463201999664307, "eval_rouge1": 0.3472651013247176, "eval_rouge2": 0.16642476533004819, "eval_rougeL": 0.2866317550606935, "eval_rougeLsum": 0.2880956525742435, "eval_runtime": 23.4232, "eval_samples_per_second": 0.854, "eval_steps_per_second": 0.427, "step": 800 }, { "epoch": 1.62, "learning_rate": 0.0004266802443991853, "loss": 1.258, "step": 810 }, { "epoch": 1.64, "learning_rate": 0.00042566191446028514, "loss": 1.3465, "step": 820 }, { "epoch": 1.66, "learning_rate": 0.00042464358452138495, "loss": 1.3383, "step": 830 }, { "epoch": 1.68, "learning_rate": 0.0004236252545824847, "loss": 1.2732, "step": 840 }, { "epoch": 1.7, "learning_rate": 0.00042260692464358453, "loss": 1.2712, "step": 850 }, { "epoch": 1.72, "learning_rate": 0.0004215885947046843, "loss": 1.2621, "step": 860 }, { "epoch": 1.74, "learning_rate": 0.00042057026476578416, "loss": 1.2981, "step": 870 }, { "epoch": 1.76, "learning_rate": 0.0004195519348268839, "loss": 1.3079, "step": 880 }, { "epoch": 1.78, "learning_rate": 0.0004185336048879837, "loss": 1.3029, "step": 890 }, { "epoch": 1.8, "learning_rate": 0.00041751527494908355, "loss": 1.2826, "step": 900 }, { "epoch": 1.8, "eval_loss": 1.0260637998580933, "eval_rouge1": 0.3271562809197811, "eval_rouge2": 0.14988747443733796, "eval_rougeL": 0.27055113042556145, "eval_rougeLsum": 0.27258271275088874, "eval_runtime": 21.6038, "eval_samples_per_second": 0.926, "eval_steps_per_second": 0.463, "step": 900 }, { "epoch": 1.82, "learning_rate": 0.0004164969450101833, "loss": 1.296, "step": 910 }, { "epoch": 1.84, "learning_rate": 0.00041547861507128307, "loss": 1.2451, "step": 920 }, { "epoch": 1.86, "learning_rate": 0.0004144602851323829, "loss": 1.2716, "step": 930 }, { "epoch": 1.88, "learning_rate": 0.0004134419551934827, "loss": 1.2908, "step": 940 }, { "epoch": 1.9, "learning_rate": 0.0004124236252545825, "loss": 1.2433, "step": 950 }, { "epoch": 1.92, "learning_rate": 0.0004114052953156823, "loss": 1.2606, "step": 960 }, { "epoch": 1.94, "learning_rate": 0.0004103869653767821, "loss": 1.2975, "step": 970 }, { "epoch": 1.96, "learning_rate": 0.0004093686354378819, "loss": 1.245, "step": 980 }, { "epoch": 1.98, "learning_rate": 0.00040835030549898167, "loss": 1.2774, "step": 990 }, { "epoch": 2.0, "learning_rate": 0.0004073319755600815, "loss": 1.297, "step": 1000 }, { "epoch": 2.0, "eval_loss": 1.02230966091156, "eval_rouge1": 0.33765263015500535, "eval_rouge2": 0.16173841205100162, "eval_rougeL": 0.2891261055238028, "eval_rougeLsum": 0.2913636739603367, "eval_runtime": 21.7283, "eval_samples_per_second": 0.92, "eval_steps_per_second": 0.46, "step": 1000 }, { "epoch": 2.02, "learning_rate": 0.0004063136456211813, "loss": 1.2437, "step": 1010 }, { "epoch": 2.04, "learning_rate": 0.00040529531568228106, "loss": 1.228, "step": 1020 }, { "epoch": 2.06, "learning_rate": 0.0004042769857433808, "loss": 1.1951, "step": 1030 }, { "epoch": 2.08, "learning_rate": 0.0004032586558044807, "loss": 1.205, "step": 1040 }, { "epoch": 2.1, "learning_rate": 0.00040224032586558045, "loss": 1.2154, "step": 1050 }, { "epoch": 2.12, "learning_rate": 0.00040122199592668026, "loss": 1.2449, "step": 1060 }, { "epoch": 2.14, "learning_rate": 0.0004002036659877801, "loss": 1.2331, "step": 1070 }, { "epoch": 2.16, "learning_rate": 0.00039918533604887984, "loss": 1.1912, "step": 1080 }, { "epoch": 2.18, "learning_rate": 0.00039816700610997965, "loss": 1.2084, "step": 1090 }, { "epoch": 2.2, "learning_rate": 0.0003971486761710794, "loss": 1.2038, "step": 1100 }, { "epoch": 2.2, "eval_loss": 1.014428973197937, "eval_rouge1": 0.32994381772002457, "eval_rouge2": 0.1560313842830252, "eval_rougeL": 0.2757795040195402, "eval_rougeLsum": 0.27717385084380153, "eval_runtime": 21.4325, "eval_samples_per_second": 0.933, "eval_steps_per_second": 0.467, "step": 1100 }, { "epoch": 2.22, "learning_rate": 0.00039613034623217923, "loss": 1.281, "step": 1110 }, { "epoch": 2.24, "learning_rate": 0.00039511201629327904, "loss": 1.1934, "step": 1120 }, { "epoch": 2.26, "learning_rate": 0.0003940936863543788, "loss": 1.2564, "step": 1130 }, { "epoch": 2.28, "learning_rate": 0.0003930753564154787, "loss": 1.2175, "step": 1140 }, { "epoch": 2.3, "learning_rate": 0.00039205702647657843, "loss": 1.2252, "step": 1150 }, { "epoch": 2.32, "learning_rate": 0.0003910386965376782, "loss": 1.2343, "step": 1160 }, { "epoch": 2.34, "learning_rate": 0.000390020366598778, "loss": 1.2715, "step": 1170 }, { "epoch": 2.36, "learning_rate": 0.0003890020366598778, "loss": 1.2275, "step": 1180 }, { "epoch": 2.38, "learning_rate": 0.0003879837067209776, "loss": 1.2482, "step": 1190 }, { "epoch": 2.4, "learning_rate": 0.0003869653767820774, "loss": 1.2617, "step": 1200 }, { "epoch": 2.4, "eval_loss": 0.9945599436759949, "eval_rouge1": 0.34311982189768164, "eval_rouge2": 0.16176698655947228, "eval_rougeL": 0.2933063191954748, "eval_rougeLsum": 0.29522199653692416, "eval_runtime": 20.2751, "eval_samples_per_second": 0.986, "eval_steps_per_second": 0.493, "step": 1200 }, { "epoch": 2.42, "learning_rate": 0.0003859470468431772, "loss": 1.19, "step": 1210 }, { "epoch": 2.44, "learning_rate": 0.000384928716904277, "loss": 1.2462, "step": 1220 }, { "epoch": 2.46, "learning_rate": 0.0003839103869653768, "loss": 1.2387, "step": 1230 }, { "epoch": 2.48, "learning_rate": 0.00038289205702647655, "loss": 1.2516, "step": 1240 }, { "epoch": 2.5, "learning_rate": 0.0003818737270875764, "loss": 1.2543, "step": 1250 }, { "epoch": 2.52, "learning_rate": 0.0003808553971486762, "loss": 1.246, "step": 1260 }, { "epoch": 2.54, "learning_rate": 0.00037983706720977594, "loss": 1.2356, "step": 1270 }, { "epoch": 2.56, "learning_rate": 0.0003788187372708758, "loss": 1.2301, "step": 1280 }, { "epoch": 2.58, "learning_rate": 0.00037780040733197557, "loss": 1.2465, "step": 1290 }, { "epoch": 2.6, "learning_rate": 0.00037678207739307533, "loss": 1.2219, "step": 1300 }, { "epoch": 2.6, "eval_loss": 0.9984191656112671, "eval_rouge1": 0.35530591683468205, "eval_rouge2": 0.1738300131149214, "eval_rougeL": 0.3007508066988732, "eval_rougeLsum": 0.30111433503149587, "eval_runtime": 21.0854, "eval_samples_per_second": 0.949, "eval_steps_per_second": 0.474, "step": 1300 }, { "epoch": 2.62, "learning_rate": 0.0003757637474541752, "loss": 1.2142, "step": 1310 }, { "epoch": 2.64, "learning_rate": 0.00037474541751527496, "loss": 1.1964, "step": 1320 }, { "epoch": 2.66, "learning_rate": 0.0003737270875763747, "loss": 1.2053, "step": 1330 }, { "epoch": 2.68, "learning_rate": 0.00037270875763747454, "loss": 1.2118, "step": 1340 }, { "epoch": 2.7, "learning_rate": 0.00037169042769857435, "loss": 1.1915, "step": 1350 }, { "epoch": 2.72, "learning_rate": 0.00037067209775967417, "loss": 1.2272, "step": 1360 }, { "epoch": 2.74, "learning_rate": 0.00036965376782077393, "loss": 1.1926, "step": 1370 }, { "epoch": 2.76, "learning_rate": 0.00036863543788187374, "loss": 1.2056, "step": 1380 }, { "epoch": 2.78, "learning_rate": 0.00036761710794297356, "loss": 1.1868, "step": 1390 }, { "epoch": 2.8, "learning_rate": 0.0003665987780040733, "loss": 1.1906, "step": 1400 }, { "epoch": 2.8, "eval_loss": 0.9987648129463196, "eval_rouge1": 0.3497044345695397, "eval_rouge2": 0.17488344746541123, "eval_rougeL": 0.2920352923144545, "eval_rougeLsum": 0.2936840461100848, "eval_runtime": 20.5611, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.486, "step": 1400 }, { "epoch": 2.82, "learning_rate": 0.0003655804480651731, "loss": 1.1981, "step": 1410 }, { "epoch": 2.84, "learning_rate": 0.00036456211812627295, "loss": 1.2079, "step": 1420 }, { "epoch": 2.86, "learning_rate": 0.0003635437881873727, "loss": 1.2036, "step": 1430 }, { "epoch": 2.88, "learning_rate": 0.00036252545824847247, "loss": 1.1892, "step": 1440 }, { "epoch": 2.9, "learning_rate": 0.00036150712830957234, "loss": 1.2223, "step": 1450 }, { "epoch": 2.92, "learning_rate": 0.0003604887983706721, "loss": 1.2214, "step": 1460 }, { "epoch": 2.94, "learning_rate": 0.0003594704684317719, "loss": 1.2206, "step": 1470 }, { "epoch": 2.96, "learning_rate": 0.0003584521384928717, "loss": 1.2218, "step": 1480 }, { "epoch": 2.98, "learning_rate": 0.0003574338085539715, "loss": 1.1856, "step": 1490 }, { "epoch": 3.0, "learning_rate": 0.0003564154786150713, "loss": 1.2303, "step": 1500 }, { "epoch": 3.0, "eval_loss": 0.9856168031692505, "eval_rouge1": 0.3750110269055352, "eval_rouge2": 0.17459799489090816, "eval_rougeL": 0.3003592834064919, "eval_rougeLsum": 0.30119936143252757, "eval_runtime": 23.4082, "eval_samples_per_second": 0.854, "eval_steps_per_second": 0.427, "step": 1500 }, { "epoch": 3.02, "learning_rate": 0.00035539714867617106, "loss": 1.1498, "step": 1510 }, { "epoch": 3.04, "learning_rate": 0.0003543788187372709, "loss": 1.2083, "step": 1520 }, { "epoch": 3.06, "learning_rate": 0.0003533604887983707, "loss": 1.2291, "step": 1530 }, { "epoch": 3.08, "learning_rate": 0.00035234215885947045, "loss": 1.1733, "step": 1540 }, { "epoch": 3.1, "learning_rate": 0.0003513238289205703, "loss": 1.1716, "step": 1550 }, { "epoch": 3.12, "learning_rate": 0.0003503054989816701, "loss": 1.1657, "step": 1560 }, { "epoch": 3.14, "learning_rate": 0.00034928716904276985, "loss": 1.1538, "step": 1570 }, { "epoch": 3.16, "learning_rate": 0.00034826883910386966, "loss": 1.1579, "step": 1580 }, { "epoch": 3.18, "learning_rate": 0.0003472505091649695, "loss": 1.1945, "step": 1590 }, { "epoch": 3.2, "learning_rate": 0.00034623217922606924, "loss": 1.18, "step": 1600 }, { "epoch": 3.2, "eval_loss": 0.9823005795478821, "eval_rouge1": 0.35334084985835634, "eval_rouge2": 0.1773564424695835, "eval_rougeL": 0.30516391982892244, "eval_rougeLsum": 0.30596841516938367, "eval_runtime": 22.0682, "eval_samples_per_second": 0.906, "eval_steps_per_second": 0.453, "step": 1600 }, { "epoch": 3.22, "learning_rate": 0.00034521384928716905, "loss": 1.1782, "step": 1610 }, { "epoch": 3.24, "learning_rate": 0.00034419551934826887, "loss": 1.2039, "step": 1620 }, { "epoch": 3.26, "learning_rate": 0.0003431771894093686, "loss": 1.1961, "step": 1630 }, { "epoch": 3.28, "learning_rate": 0.00034215885947046844, "loss": 1.1846, "step": 1640 }, { "epoch": 3.3, "learning_rate": 0.0003411405295315682, "loss": 1.1776, "step": 1650 }, { "epoch": 3.32, "learning_rate": 0.00034012219959266807, "loss": 1.1549, "step": 1660 }, { "epoch": 3.34, "learning_rate": 0.00033910386965376783, "loss": 1.1442, "step": 1670 }, { "epoch": 3.36, "learning_rate": 0.0003380855397148676, "loss": 1.1672, "step": 1680 }, { "epoch": 3.38, "learning_rate": 0.00033706720977596746, "loss": 1.1854, "step": 1690 }, { "epoch": 3.4, "learning_rate": 0.0003360488798370672, "loss": 1.1435, "step": 1700 }, { "epoch": 3.4, "eval_loss": 0.9792933464050293, "eval_rouge1": 0.3566145348467804, "eval_rouge2": 0.16981457897259283, "eval_rougeL": 0.29555110085672354, "eval_rougeLsum": 0.29638538572800865, "eval_runtime": 20.8074, "eval_samples_per_second": 0.961, "eval_steps_per_second": 0.481, "step": 1700 }, { "epoch": 3.42, "learning_rate": 0.000335030549898167, "loss": 1.1721, "step": 1710 }, { "epoch": 3.44, "learning_rate": 0.0003340122199592668, "loss": 1.1546, "step": 1720 }, { "epoch": 3.46, "learning_rate": 0.0003329938900203666, "loss": 1.1843, "step": 1730 }, { "epoch": 3.48, "learning_rate": 0.00033197556008146637, "loss": 1.1968, "step": 1740 }, { "epoch": 3.5, "learning_rate": 0.0003309572301425662, "loss": 1.1481, "step": 1750 }, { "epoch": 3.52, "learning_rate": 0.000329938900203666, "loss": 1.1473, "step": 1760 }, { "epoch": 3.54, "learning_rate": 0.0003289205702647658, "loss": 1.1729, "step": 1770 }, { "epoch": 3.56, "learning_rate": 0.0003279022403258656, "loss": 1.1401, "step": 1780 }, { "epoch": 3.58, "learning_rate": 0.0003268839103869654, "loss": 1.1602, "step": 1790 }, { "epoch": 3.6, "learning_rate": 0.0003258655804480652, "loss": 1.1473, "step": 1800 }, { "epoch": 3.6, "eval_loss": 0.9619871973991394, "eval_rouge1": 0.36668217495119926, "eval_rouge2": 0.18802901856822518, "eval_rougeL": 0.3009048329724593, "eval_rougeLsum": 0.3020958756940847, "eval_runtime": 20.3135, "eval_samples_per_second": 0.985, "eval_steps_per_second": 0.492, "step": 1800 }, { "epoch": 3.62, "learning_rate": 0.00032484725050916497, "loss": 1.1533, "step": 1810 }, { "epoch": 3.64, "learning_rate": 0.00032382892057026473, "loss": 1.1557, "step": 1820 }, { "epoch": 3.66, "learning_rate": 0.0003228105906313646, "loss": 1.2091, "step": 1830 }, { "epoch": 3.68, "learning_rate": 0.00032179226069246436, "loss": 1.1791, "step": 1840 }, { "epoch": 3.7, "learning_rate": 0.0003207739307535642, "loss": 1.1407, "step": 1850 }, { "epoch": 3.72, "learning_rate": 0.000319755600814664, "loss": 1.1498, "step": 1860 }, { "epoch": 3.74, "learning_rate": 0.00031873727087576375, "loss": 1.1368, "step": 1870 }, { "epoch": 3.76, "learning_rate": 0.00031771894093686356, "loss": 1.1634, "step": 1880 }, { "epoch": 3.78, "learning_rate": 0.0003167006109979633, "loss": 1.168, "step": 1890 }, { "epoch": 3.8, "learning_rate": 0.00031568228105906314, "loss": 1.1588, "step": 1900 }, { "epoch": 3.8, "eval_loss": 0.9718366861343384, "eval_rouge1": 0.36821337929430387, "eval_rouge2": 0.17821834663511393, "eval_rougeL": 0.30543977513475806, "eval_rougeLsum": 0.3075353240284504, "eval_runtime": 20.7174, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.483, "step": 1900 }, { "epoch": 3.82, "learning_rate": 0.00031466395112016295, "loss": 1.1989, "step": 1910 }, { "epoch": 3.84, "learning_rate": 0.0003136456211812627, "loss": 1.1542, "step": 1920 }, { "epoch": 3.86, "learning_rate": 0.00031262729124236253, "loss": 1.203, "step": 1930 }, { "epoch": 3.88, "learning_rate": 0.00031160896130346234, "loss": 1.1644, "step": 1940 }, { "epoch": 3.9, "learning_rate": 0.0003105906313645621, "loss": 1.1266, "step": 1950 }, { "epoch": 3.92, "learning_rate": 0.0003095723014256619, "loss": 1.1994, "step": 1960 }, { "epoch": 3.94, "learning_rate": 0.00030855397148676173, "loss": 1.1402, "step": 1970 }, { "epoch": 3.96, "learning_rate": 0.0003075356415478615, "loss": 1.1348, "step": 1980 }, { "epoch": 3.98, "learning_rate": 0.0003065173116089613, "loss": 1.1582, "step": 1990 }, { "epoch": 4.0, "learning_rate": 0.0003054989816700611, "loss": 1.127, "step": 2000 }, { "epoch": 4.0, "eval_loss": 0.9626486897468567, "eval_rouge1": 0.33933999922378055, "eval_rouge2": 0.15960640801652384, "eval_rougeL": 0.27841295690954404, "eval_rougeLsum": 0.2805778340104648, "eval_runtime": 20.5994, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.485, "step": 2000 }, { "epoch": 4.02, "learning_rate": 0.0003044806517311609, "loss": 1.1229, "step": 2010 }, { "epoch": 4.04, "learning_rate": 0.0003034623217922607, "loss": 1.1027, "step": 2020 }, { "epoch": 4.06, "learning_rate": 0.0003024439918533605, "loss": 1.0923, "step": 2030 }, { "epoch": 4.08, "learning_rate": 0.0003014256619144603, "loss": 1.1416, "step": 2040 }, { "epoch": 4.1, "learning_rate": 0.0003004073319755601, "loss": 1.1066, "step": 2050 }, { "epoch": 4.12, "learning_rate": 0.00029938900203665985, "loss": 1.1045, "step": 2060 }, { "epoch": 4.14, "learning_rate": 0.0002983706720977597, "loss": 1.1172, "step": 2070 }, { "epoch": 4.16, "learning_rate": 0.0002973523421588595, "loss": 1.1535, "step": 2080 }, { "epoch": 4.18, "learning_rate": 0.00029633401221995924, "loss": 1.1866, "step": 2090 }, { "epoch": 4.2, "learning_rate": 0.0002953156822810591, "loss": 1.1251, "step": 2100 }, { "epoch": 4.2, "eval_loss": 0.9701215028762817, "eval_rouge1": 0.36918849108015117, "eval_rouge2": 0.1756845524684395, "eval_rougeL": 0.29888162339098345, "eval_rougeLsum": 0.30162046588140573, "eval_runtime": 24.6363, "eval_samples_per_second": 0.812, "eval_steps_per_second": 0.406, "step": 2100 }, { "epoch": 4.22, "learning_rate": 0.00029429735234215887, "loss": 1.1246, "step": 2110 }, { "epoch": 4.24, "learning_rate": 0.00029327902240325863, "loss": 1.1562, "step": 2120 }, { "epoch": 4.26, "learning_rate": 0.00029226069246435845, "loss": 1.1627, "step": 2130 }, { "epoch": 4.28, "learning_rate": 0.00029124236252545826, "loss": 1.1157, "step": 2140 }, { "epoch": 4.3, "learning_rate": 0.0002902240325865581, "loss": 1.1398, "step": 2150 }, { "epoch": 4.32, "learning_rate": 0.00028920570264765784, "loss": 1.1432, "step": 2160 }, { "epoch": 4.34, "learning_rate": 0.00028818737270875765, "loss": 1.1318, "step": 2170 }, { "epoch": 4.36, "learning_rate": 0.00028716904276985747, "loss": 1.1461, "step": 2180 }, { "epoch": 4.38, "learning_rate": 0.00028615071283095723, "loss": 1.13, "step": 2190 }, { "epoch": 4.4, "learning_rate": 0.000285132382892057, "loss": 1.085, "step": 2200 }, { "epoch": 4.4, "eval_loss": 0.9604999423027039, "eval_rouge1": 0.3531685802104979, "eval_rouge2": 0.16500844586291133, "eval_rougeL": 0.28475165873194214, "eval_rougeLsum": 0.2861591101839957, "eval_runtime": 20.5667, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.486, "step": 2200 }, { "epoch": 4.42, "learning_rate": 0.00028411405295315686, "loss": 1.1408, "step": 2210 }, { "epoch": 4.44, "learning_rate": 0.0002830957230142566, "loss": 1.1458, "step": 2220 }, { "epoch": 4.46, "learning_rate": 0.0002820773930753564, "loss": 1.1056, "step": 2230 }, { "epoch": 4.48, "learning_rate": 0.00028105906313645625, "loss": 1.1933, "step": 2240 }, { "epoch": 4.5, "learning_rate": 0.000280040733197556, "loss": 1.1478, "step": 2250 }, { "epoch": 4.52, "learning_rate": 0.0002790224032586558, "loss": 1.0862, "step": 2260 }, { "epoch": 4.54, "learning_rate": 0.0002780040733197556, "loss": 1.1433, "step": 2270 }, { "epoch": 4.56, "learning_rate": 0.0002769857433808554, "loss": 1.124, "step": 2280 }, { "epoch": 4.58, "learning_rate": 0.0002759674134419552, "loss": 1.095, "step": 2290 }, { "epoch": 4.6, "learning_rate": 0.000274949083503055, "loss": 1.0922, "step": 2300 }, { "epoch": 4.6, "eval_loss": 0.9633736610412598, "eval_rouge1": 0.35626224117845484, "eval_rouge2": 0.16997535406808995, "eval_rougeL": 0.2941903529387039, "eval_rougeLsum": 0.2963873855076279, "eval_runtime": 22.0443, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.454, "step": 2300 }, { "epoch": 4.62, "learning_rate": 0.0002739307535641548, "loss": 1.1468, "step": 2310 }, { "epoch": 4.64, "learning_rate": 0.0002729124236252546, "loss": 1.1456, "step": 2320 }, { "epoch": 4.66, "learning_rate": 0.00027189409368635437, "loss": 1.1317, "step": 2330 }, { "epoch": 4.68, "learning_rate": 0.0002708757637474542, "loss": 1.1078, "step": 2340 }, { "epoch": 4.7, "learning_rate": 0.000269857433808554, "loss": 1.143, "step": 2350 }, { "epoch": 4.72, "learning_rate": 0.00026883910386965376, "loss": 1.1194, "step": 2360 }, { "epoch": 4.74, "learning_rate": 0.00026782077393075357, "loss": 1.1122, "step": 2370 }, { "epoch": 4.76, "learning_rate": 0.0002668024439918534, "loss": 1.1275, "step": 2380 }, { "epoch": 4.78, "learning_rate": 0.00026578411405295315, "loss": 1.1305, "step": 2390 }, { "epoch": 4.8, "learning_rate": 0.00026476578411405296, "loss": 1.1649, "step": 2400 }, { "epoch": 4.8, "eval_loss": 0.9545726776123047, "eval_rouge1": 0.38101044159436365, "eval_rouge2": 0.1957583700370854, "eval_rougeL": 0.3214174366996265, "eval_rougeLsum": 0.3228583149166323, "eval_runtime": 21.4511, "eval_samples_per_second": 0.932, "eval_steps_per_second": 0.466, "step": 2400 }, { "epoch": 4.82, "learning_rate": 0.0002637474541751528, "loss": 1.104, "step": 2410 }, { "epoch": 4.84, "learning_rate": 0.00026272912423625254, "loss": 1.1482, "step": 2420 }, { "epoch": 4.86, "learning_rate": 0.00026171079429735235, "loss": 1.1361, "step": 2430 }, { "epoch": 4.88, "learning_rate": 0.0002606924643584521, "loss": 1.0924, "step": 2440 }, { "epoch": 4.9, "learning_rate": 0.000259674134419552, "loss": 1.1313, "step": 2450 }, { "epoch": 4.92, "learning_rate": 0.00025865580448065174, "loss": 1.0971, "step": 2460 }, { "epoch": 4.94, "learning_rate": 0.0002576374745417515, "loss": 1.1653, "step": 2470 }, { "epoch": 4.96, "learning_rate": 0.00025661914460285137, "loss": 1.1438, "step": 2480 }, { "epoch": 4.98, "learning_rate": 0.00025560081466395113, "loss": 1.139, "step": 2490 }, { "epoch": 5.0, "learning_rate": 0.0002545824847250509, "loss": 1.1279, "step": 2500 }, { "epoch": 5.0, "eval_loss": 0.9487113952636719, "eval_rouge1": 0.3782997160632432, "eval_rouge2": 0.20289958909161465, "eval_rougeL": 0.3222664292441296, "eval_rougeLsum": 0.3240112049119128, "eval_runtime": 21.5846, "eval_samples_per_second": 0.927, "eval_steps_per_second": 0.463, "step": 2500 }, { "epoch": 5.02, "learning_rate": 0.0002535641547861507, "loss": 1.1028, "step": 2510 }, { "epoch": 5.04, "learning_rate": 0.0002525458248472505, "loss": 1.0659, "step": 2520 }, { "epoch": 5.06, "learning_rate": 0.0002515274949083503, "loss": 1.115, "step": 2530 }, { "epoch": 5.08, "learning_rate": 0.0002505091649694501, "loss": 1.0952, "step": 2540 }, { "epoch": 5.1, "learning_rate": 0.0002494908350305499, "loss": 1.1092, "step": 2550 }, { "epoch": 5.12, "learning_rate": 0.00024847250509164973, "loss": 1.1194, "step": 2560 }, { "epoch": 5.14, "learning_rate": 0.0002474541751527495, "loss": 1.11, "step": 2570 }, { "epoch": 5.16, "learning_rate": 0.0002464358452138493, "loss": 1.1524, "step": 2580 }, { "epoch": 5.18, "learning_rate": 0.0002454175152749491, "loss": 1.1122, "step": 2590 }, { "epoch": 5.2, "learning_rate": 0.0002443991853360489, "loss": 1.0798, "step": 2600 }, { "epoch": 5.2, "eval_loss": 0.9421226382255554, "eval_rouge1": 0.3597269877576089, "eval_rouge2": 0.1775049607682675, "eval_rougeL": 0.2919995300536511, "eval_rougeLsum": 0.29538083346340016, "eval_runtime": 22.4063, "eval_samples_per_second": 0.893, "eval_steps_per_second": 0.446, "step": 2600 }, { "epoch": 5.22, "learning_rate": 0.0002433808553971487, "loss": 1.1141, "step": 2610 }, { "epoch": 5.24, "learning_rate": 0.00024236252545824845, "loss": 1.11, "step": 2620 }, { "epoch": 5.26, "learning_rate": 0.00024134419551934827, "loss": 1.1206, "step": 2630 }, { "epoch": 5.28, "learning_rate": 0.00024032586558044808, "loss": 1.1035, "step": 2640 }, { "epoch": 5.3, "learning_rate": 0.00023930753564154787, "loss": 1.1362, "step": 2650 }, { "epoch": 5.32, "learning_rate": 0.00023828920570264766, "loss": 1.1023, "step": 2660 }, { "epoch": 5.34, "learning_rate": 0.00023727087576374745, "loss": 1.1237, "step": 2670 }, { "epoch": 5.36, "learning_rate": 0.00023625254582484726, "loss": 1.0934, "step": 2680 }, { "epoch": 5.38, "learning_rate": 0.00023523421588594705, "loss": 1.0976, "step": 2690 }, { "epoch": 5.4, "learning_rate": 0.00023421588594704684, "loss": 1.1468, "step": 2700 }, { "epoch": 5.4, "eval_loss": 0.9474976658821106, "eval_rouge1": 0.37719594840753984, "eval_rouge2": 0.19434086595475156, "eval_rougeL": 0.312798697930013, "eval_rougeLsum": 0.3158141266137696, "eval_runtime": 21.5064, "eval_samples_per_second": 0.93, "eval_steps_per_second": 0.465, "step": 2700 }, { "epoch": 5.42, "learning_rate": 0.00023319755600814665, "loss": 1.1283, "step": 2710 }, { "epoch": 5.44, "learning_rate": 0.00023217922606924644, "loss": 1.105, "step": 2720 }, { "epoch": 5.46, "learning_rate": 0.00023116089613034625, "loss": 1.0968, "step": 2730 }, { "epoch": 5.48, "learning_rate": 0.00023014256619144602, "loss": 1.0931, "step": 2740 }, { "epoch": 5.5, "learning_rate": 0.00022912423625254583, "loss": 1.1181, "step": 2750 }, { "epoch": 5.52, "learning_rate": 0.00022810590631364565, "loss": 1.0492, "step": 2760 }, { "epoch": 5.54, "learning_rate": 0.0002270875763747454, "loss": 1.0888, "step": 2770 }, { "epoch": 5.56, "learning_rate": 0.00022606924643584522, "loss": 1.0944, "step": 2780 }, { "epoch": 5.58, "learning_rate": 0.000225050916496945, "loss": 1.0851, "step": 2790 }, { "epoch": 5.6, "learning_rate": 0.00022403258655804482, "loss": 1.073, "step": 2800 }, { "epoch": 5.6, "eval_loss": 0.9429187774658203, "eval_rouge1": 0.3534213889657237, "eval_rouge2": 0.1666029802662448, "eval_rougeL": 0.298122774116486, "eval_rougeLsum": 0.3017447168430196, "eval_runtime": 21.9067, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.456, "step": 2800 }, { "epoch": 5.62, "learning_rate": 0.00022301425661914458, "loss": 1.1145, "step": 2810 }, { "epoch": 5.64, "learning_rate": 0.0002219959266802444, "loss": 1.081, "step": 2820 }, { "epoch": 5.66, "learning_rate": 0.00022097759674134421, "loss": 1.1057, "step": 2830 }, { "epoch": 5.68, "learning_rate": 0.000219959266802444, "loss": 1.1037, "step": 2840 }, { "epoch": 5.7, "learning_rate": 0.0002189409368635438, "loss": 1.0963, "step": 2850 }, { "epoch": 5.72, "learning_rate": 0.00021792260692464358, "loss": 1.0834, "step": 2860 }, { "epoch": 5.74, "learning_rate": 0.0002169042769857434, "loss": 1.1108, "step": 2870 }, { "epoch": 5.76, "learning_rate": 0.0002158859470468432, "loss": 1.1077, "step": 2880 }, { "epoch": 5.78, "learning_rate": 0.00021486761710794297, "loss": 1.0936, "step": 2890 }, { "epoch": 5.8, "learning_rate": 0.00021384928716904278, "loss": 1.0598, "step": 2900 }, { "epoch": 5.8, "eval_loss": 0.9476393461227417, "eval_rouge1": 0.3460493427394816, "eval_rouge2": 0.16425447886287378, "eval_rougeL": 0.29176256122671795, "eval_rougeLsum": 0.2932874710130029, "eval_runtime": 22.7632, "eval_samples_per_second": 0.879, "eval_steps_per_second": 0.439, "step": 2900 }, { "epoch": 5.82, "learning_rate": 0.00021283095723014257, "loss": 1.1012, "step": 2910 }, { "epoch": 5.84, "learning_rate": 0.00021181262729124236, "loss": 1.1155, "step": 2920 }, { "epoch": 5.86, "learning_rate": 0.00021079429735234215, "loss": 1.0611, "step": 2930 }, { "epoch": 5.88, "learning_rate": 0.00020977596741344196, "loss": 1.1044, "step": 2940 }, { "epoch": 5.9, "learning_rate": 0.00020875763747454178, "loss": 1.0781, "step": 2950 }, { "epoch": 5.92, "learning_rate": 0.00020773930753564154, "loss": 1.118, "step": 2960 }, { "epoch": 5.94, "learning_rate": 0.00020672097759674135, "loss": 1.0767, "step": 2970 }, { "epoch": 5.96, "learning_rate": 0.00020570264765784114, "loss": 1.1183, "step": 2980 }, { "epoch": 5.98, "learning_rate": 0.00020468431771894095, "loss": 1.0675, "step": 2990 }, { "epoch": 6.0, "learning_rate": 0.00020366598778004074, "loss": 1.0567, "step": 3000 }, { "epoch": 6.0, "eval_loss": 0.9474280476570129, "eval_rouge1": 0.3549405529957428, "eval_rouge2": 0.17220677580168336, "eval_rougeL": 0.29622719821503607, "eval_rougeLsum": 0.29765510347367236, "eval_runtime": 21.6142, "eval_samples_per_second": 0.925, "eval_steps_per_second": 0.463, "step": 3000 }, { "epoch": 6.02, "learning_rate": 0.00020264765784114053, "loss": 1.082, "step": 3010 }, { "epoch": 6.04, "learning_rate": 0.00020162932790224034, "loss": 1.0653, "step": 3020 }, { "epoch": 6.06, "learning_rate": 0.00020061099796334013, "loss": 1.0819, "step": 3030 }, { "epoch": 6.08, "learning_rate": 0.00019959266802443992, "loss": 1.0833, "step": 3040 }, { "epoch": 6.1, "learning_rate": 0.0001985743380855397, "loss": 1.0683, "step": 3050 }, { "epoch": 6.12, "learning_rate": 0.00019755600814663952, "loss": 1.0805, "step": 3060 }, { "epoch": 6.14, "learning_rate": 0.00019653767820773934, "loss": 1.0535, "step": 3070 }, { "epoch": 6.16, "learning_rate": 0.0001955193482688391, "loss": 1.0952, "step": 3080 }, { "epoch": 6.18, "learning_rate": 0.0001945010183299389, "loss": 1.0997, "step": 3090 }, { "epoch": 6.2, "learning_rate": 0.0001934826883910387, "loss": 1.0773, "step": 3100 }, { "epoch": 6.2, "eval_loss": 0.9442319869995117, "eval_rouge1": 0.3697494602820765, "eval_rouge2": 0.17309826892569075, "eval_rougeL": 0.30189218077829605, "eval_rougeLsum": 0.3058880236996683, "eval_runtime": 22.8637, "eval_samples_per_second": 0.875, "eval_steps_per_second": 0.437, "step": 3100 }, { "epoch": 6.22, "learning_rate": 0.0001924643584521385, "loss": 1.0739, "step": 3110 }, { "epoch": 6.24, "learning_rate": 0.00019144602851323828, "loss": 1.0964, "step": 3120 }, { "epoch": 6.26, "learning_rate": 0.0001904276985743381, "loss": 1.0914, "step": 3130 }, { "epoch": 6.28, "learning_rate": 0.0001894093686354379, "loss": 1.0836, "step": 3140 }, { "epoch": 6.3, "learning_rate": 0.00018839103869653767, "loss": 1.0812, "step": 3150 }, { "epoch": 6.32, "learning_rate": 0.00018737270875763748, "loss": 1.1139, "step": 3160 }, { "epoch": 6.34, "learning_rate": 0.00018635437881873727, "loss": 1.0565, "step": 3170 }, { "epoch": 6.36, "learning_rate": 0.00018533604887983708, "loss": 1.1065, "step": 3180 }, { "epoch": 6.38, "learning_rate": 0.00018431771894093687, "loss": 1.0972, "step": 3190 }, { "epoch": 6.4, "learning_rate": 0.00018329938900203666, "loss": 1.0704, "step": 3200 }, { "epoch": 6.4, "eval_loss": 0.9415400624275208, "eval_rouge1": 0.3552015796577298, "eval_rouge2": 0.17624288378508868, "eval_rougeL": 0.29158581025602837, "eval_rougeLsum": 0.2947848268366889, "eval_runtime": 21.6045, "eval_samples_per_second": 0.926, "eval_steps_per_second": 0.463, "step": 3200 }, { "epoch": 6.42, "learning_rate": 0.00018228105906313647, "loss": 1.0911, "step": 3210 }, { "epoch": 6.44, "learning_rate": 0.00018126272912423623, "loss": 1.1143, "step": 3220 }, { "epoch": 6.46, "learning_rate": 0.00018024439918533605, "loss": 1.0511, "step": 3230 }, { "epoch": 6.48, "learning_rate": 0.00017922606924643584, "loss": 1.0629, "step": 3240 }, { "epoch": 6.5, "learning_rate": 0.00017820773930753565, "loss": 1.0291, "step": 3250 }, { "epoch": 6.52, "learning_rate": 0.00017718940936863544, "loss": 1.0673, "step": 3260 }, { "epoch": 6.54, "learning_rate": 0.00017617107942973523, "loss": 1.0804, "step": 3270 }, { "epoch": 6.56, "learning_rate": 0.00017515274949083504, "loss": 1.0569, "step": 3280 }, { "epoch": 6.58, "learning_rate": 0.00017413441955193483, "loss": 1.086, "step": 3290 }, { "epoch": 6.6, "learning_rate": 0.00017311608961303462, "loss": 1.0676, "step": 3300 }, { "epoch": 6.6, "eval_loss": 0.9348514676094055, "eval_rouge1": 0.37110363769688715, "eval_rouge2": 0.18311514856569724, "eval_rougeL": 0.3001004216892586, "eval_rougeLsum": 0.30109761355013054, "eval_runtime": 22.0391, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.454, "step": 3300 }, { "epoch": 6.62, "learning_rate": 0.00017209775967413443, "loss": 1.0646, "step": 3310 }, { "epoch": 6.64, "learning_rate": 0.00017107942973523422, "loss": 1.1062, "step": 3320 }, { "epoch": 6.66, "learning_rate": 0.00017006109979633404, "loss": 1.0522, "step": 3330 }, { "epoch": 6.68, "learning_rate": 0.0001690427698574338, "loss": 1.0598, "step": 3340 }, { "epoch": 6.7, "learning_rate": 0.0001680244399185336, "loss": 1.0777, "step": 3350 }, { "epoch": 6.72, "learning_rate": 0.0001670061099796334, "loss": 1.0967, "step": 3360 }, { "epoch": 6.74, "learning_rate": 0.00016598778004073319, "loss": 1.0654, "step": 3370 }, { "epoch": 6.76, "learning_rate": 0.000164969450101833, "loss": 1.0776, "step": 3380 }, { "epoch": 6.78, "learning_rate": 0.0001639511201629328, "loss": 1.126, "step": 3390 }, { "epoch": 6.8, "learning_rate": 0.0001629327902240326, "loss": 1.1015, "step": 3400 }, { "epoch": 6.8, "eval_loss": 0.9340616464614868, "eval_rouge1": 0.3645773285826771, "eval_rouge2": 0.1797763488186414, "eval_rougeL": 0.3043672589098563, "eval_rougeLsum": 0.30620848729732686, "eval_runtime": 22.359, "eval_samples_per_second": 0.894, "eval_steps_per_second": 0.447, "step": 3400 }, { "epoch": 6.82, "learning_rate": 0.00016191446028513236, "loss": 1.0782, "step": 3410 }, { "epoch": 6.84, "learning_rate": 0.00016089613034623218, "loss": 1.0664, "step": 3420 }, { "epoch": 6.86, "learning_rate": 0.000159877800407332, "loss": 1.0867, "step": 3430 }, { "epoch": 6.88, "learning_rate": 0.00015885947046843178, "loss": 1.0813, "step": 3440 }, { "epoch": 6.9, "learning_rate": 0.00015784114052953157, "loss": 1.0541, "step": 3450 }, { "epoch": 6.92, "learning_rate": 0.00015682281059063136, "loss": 1.0479, "step": 3460 }, { "epoch": 6.94, "learning_rate": 0.00015580448065173117, "loss": 1.0574, "step": 3470 }, { "epoch": 6.96, "learning_rate": 0.00015478615071283096, "loss": 1.0775, "step": 3480 }, { "epoch": 6.98, "learning_rate": 0.00015376782077393075, "loss": 1.0567, "step": 3490 }, { "epoch": 7.0, "learning_rate": 0.00015274949083503056, "loss": 1.0895, "step": 3500 }, { "epoch": 7.0, "eval_loss": 0.9335128664970398, "eval_rouge1": 0.3564737046975759, "eval_rouge2": 0.17665902660761595, "eval_rougeL": 0.2936209010289881, "eval_rougeLsum": 0.29567036214117715, "eval_runtime": 21.7504, "eval_samples_per_second": 0.92, "eval_steps_per_second": 0.46, "step": 3500 }, { "epoch": 7.02, "learning_rate": 0.00015173116089613035, "loss": 1.019, "step": 3510 }, { "epoch": 7.04, "learning_rate": 0.00015071283095723014, "loss": 1.0701, "step": 3520 }, { "epoch": 7.06, "learning_rate": 0.00014969450101832993, "loss": 1.064, "step": 3530 }, { "epoch": 7.08, "learning_rate": 0.00014867617107942974, "loss": 1.0464, "step": 3540 }, { "epoch": 7.1, "learning_rate": 0.00014765784114052956, "loss": 1.0958, "step": 3550 }, { "epoch": 7.12, "learning_rate": 0.00014663951120162932, "loss": 1.0458, "step": 3560 }, { "epoch": 7.14, "learning_rate": 0.00014562118126272913, "loss": 1.0012, "step": 3570 }, { "epoch": 7.16, "learning_rate": 0.00014460285132382892, "loss": 1.0629, "step": 3580 }, { "epoch": 7.18, "learning_rate": 0.00014358452138492873, "loss": 1.049, "step": 3590 }, { "epoch": 7.2, "learning_rate": 0.0001425661914460285, "loss": 1.0839, "step": 3600 }, { "epoch": 7.2, "eval_loss": 0.9262797236442566, "eval_rouge1": 0.3608261849266158, "eval_rouge2": 0.16796265957036072, "eval_rougeL": 0.300587364099427, "eval_rougeLsum": 0.3018268665573097, "eval_runtime": 22.8495, "eval_samples_per_second": 0.875, "eval_steps_per_second": 0.438, "step": 3600 }, { "epoch": 7.22, "learning_rate": 0.0001415478615071283, "loss": 1.0231, "step": 3610 }, { "epoch": 7.24, "learning_rate": 0.00014052953156822812, "loss": 1.043, "step": 3620 }, { "epoch": 7.26, "learning_rate": 0.0001395112016293279, "loss": 1.0423, "step": 3630 }, { "epoch": 7.28, "learning_rate": 0.0001384928716904277, "loss": 1.1131, "step": 3640 }, { "epoch": 7.3, "learning_rate": 0.0001374745417515275, "loss": 1.0106, "step": 3650 }, { "epoch": 7.32, "learning_rate": 0.0001364562118126273, "loss": 1.0663, "step": 3660 }, { "epoch": 7.34, "learning_rate": 0.0001354378818737271, "loss": 1.0842, "step": 3670 }, { "epoch": 7.36, "learning_rate": 0.00013441955193482688, "loss": 1.0482, "step": 3680 }, { "epoch": 7.38, "learning_rate": 0.0001334012219959267, "loss": 1.1137, "step": 3690 }, { "epoch": 7.4, "learning_rate": 0.00013238289205702648, "loss": 1.0737, "step": 3700 }, { "epoch": 7.4, "eval_loss": 0.9295714497566223, "eval_rouge1": 0.35713241736503193, "eval_rouge2": 0.18064337174705725, "eval_rougeL": 0.2977802249700461, "eval_rougeLsum": 0.2990767538657372, "eval_runtime": 21.6594, "eval_samples_per_second": 0.923, "eval_steps_per_second": 0.462, "step": 3700 }, { "epoch": 7.42, "learning_rate": 0.00013136456211812627, "loss": 1.0793, "step": 3710 }, { "epoch": 7.44, "learning_rate": 0.00013034623217922606, "loss": 1.0964, "step": 3720 }, { "epoch": 7.46, "learning_rate": 0.00012932790224032587, "loss": 1.0471, "step": 3730 }, { "epoch": 7.48, "learning_rate": 0.00012830957230142569, "loss": 1.0647, "step": 3740 }, { "epoch": 7.5, "learning_rate": 0.00012729124236252545, "loss": 1.095, "step": 3750 }, { "epoch": 7.52, "learning_rate": 0.00012627291242362526, "loss": 1.0891, "step": 3760 }, { "epoch": 7.54, "learning_rate": 0.00012525458248472505, "loss": 1.0876, "step": 3770 }, { "epoch": 7.56, "learning_rate": 0.00012423625254582486, "loss": 1.0112, "step": 3780 }, { "epoch": 7.58, "learning_rate": 0.00012321792260692465, "loss": 1.0352, "step": 3790 }, { "epoch": 7.6, "learning_rate": 0.00012219959266802444, "loss": 1.0549, "step": 3800 }, { "epoch": 7.6, "eval_loss": 0.9341118931770325, "eval_rouge1": 0.35969569162284754, "eval_rouge2": 0.17023162775203884, "eval_rougeL": 0.28257423918684926, "eval_rougeLsum": 0.28412150462963986, "eval_runtime": 22.2811, "eval_samples_per_second": 0.898, "eval_steps_per_second": 0.449, "step": 3800 }, { "epoch": 7.62, "learning_rate": 0.00012118126272912423, "loss": 1.0636, "step": 3810 }, { "epoch": 7.64, "learning_rate": 0.00012016293279022404, "loss": 1.0719, "step": 3820 }, { "epoch": 7.66, "learning_rate": 0.00011914460285132383, "loss": 1.0451, "step": 3830 }, { "epoch": 7.68, "learning_rate": 0.00011812627291242363, "loss": 1.0509, "step": 3840 }, { "epoch": 7.7, "learning_rate": 0.00011710794297352342, "loss": 1.0515, "step": 3850 }, { "epoch": 7.72, "learning_rate": 0.00011608961303462322, "loss": 1.0525, "step": 3860 }, { "epoch": 7.74, "learning_rate": 0.00011507128309572301, "loss": 1.0751, "step": 3870 }, { "epoch": 7.76, "learning_rate": 0.00011405295315682282, "loss": 1.059, "step": 3880 }, { "epoch": 7.78, "learning_rate": 0.00011303462321792261, "loss": 1.0352, "step": 3890 }, { "epoch": 7.8, "learning_rate": 0.00011201629327902241, "loss": 1.065, "step": 3900 }, { "epoch": 7.8, "eval_loss": 0.9318963885307312, "eval_rouge1": 0.36719535582225143, "eval_rouge2": 0.18487793490577756, "eval_rougeL": 0.3063474590760422, "eval_rougeLsum": 0.3072518409290927, "eval_runtime": 22.5338, "eval_samples_per_second": 0.888, "eval_steps_per_second": 0.444, "step": 3900 }, { "epoch": 7.82, "learning_rate": 0.0001109979633401222, "loss": 1.0591, "step": 3910 }, { "epoch": 7.84, "learning_rate": 0.000109979633401222, "loss": 1.0553, "step": 3920 }, { "epoch": 7.86, "learning_rate": 0.00010896130346232179, "loss": 1.0766, "step": 3930 }, { "epoch": 7.88, "learning_rate": 0.0001079429735234216, "loss": 1.0434, "step": 3940 }, { "epoch": 7.9, "learning_rate": 0.00010692464358452139, "loss": 1.0453, "step": 3950 }, { "epoch": 7.92, "learning_rate": 0.00010590631364562118, "loss": 1.0754, "step": 3960 }, { "epoch": 7.94, "learning_rate": 0.00010488798370672098, "loss": 1.0754, "step": 3970 }, { "epoch": 7.96, "learning_rate": 0.00010386965376782077, "loss": 1.0743, "step": 3980 }, { "epoch": 7.98, "learning_rate": 0.00010285132382892057, "loss": 1.0792, "step": 3990 }, { "epoch": 8.0, "learning_rate": 0.00010183299389002037, "loss": 1.0676, "step": 4000 }, { "epoch": 8.0, "eval_loss": 0.9239376783370972, "eval_rouge1": 0.3708780006445588, "eval_rouge2": 0.18486714155230236, "eval_rougeL": 0.30909528682097154, "eval_rougeLsum": 0.31117436309534663, "eval_runtime": 22.2042, "eval_samples_per_second": 0.901, "eval_steps_per_second": 0.45, "step": 4000 }, { "epoch": 8.02, "learning_rate": 0.00010081466395112017, "loss": 1.0105, "step": 4010 }, { "epoch": 8.04, "learning_rate": 9.979633401221996e-05, "loss": 1.0505, "step": 4020 }, { "epoch": 8.06, "learning_rate": 9.877800407331976e-05, "loss": 1.0375, "step": 4030 }, { "epoch": 8.08, "learning_rate": 9.775967413441955e-05, "loss": 1.0465, "step": 4040 }, { "epoch": 8.1, "learning_rate": 9.674134419551935e-05, "loss": 1.0249, "step": 4050 }, { "epoch": 8.12, "learning_rate": 9.572301425661914e-05, "loss": 1.0806, "step": 4060 }, { "epoch": 8.14, "learning_rate": 9.470468431771895e-05, "loss": 1.0233, "step": 4070 }, { "epoch": 8.16, "learning_rate": 9.368635437881874e-05, "loss": 1.0605, "step": 4080 }, { "epoch": 8.18, "learning_rate": 9.266802443991854e-05, "loss": 1.0336, "step": 4090 }, { "epoch": 8.2, "learning_rate": 9.164969450101833e-05, "loss": 1.0327, "step": 4100 }, { "epoch": 8.2, "eval_loss": 0.9313555955886841, "eval_rouge1": 0.3612621102138621, "eval_rouge2": 0.17914853351838206, "eval_rougeL": 0.3058448839143484, "eval_rougeLsum": 0.30736619803392246, "eval_runtime": 21.8087, "eval_samples_per_second": 0.917, "eval_steps_per_second": 0.459, "step": 4100 }, { "epoch": 8.22, "learning_rate": 9.063136456211812e-05, "loss": 1.0526, "step": 4110 }, { "epoch": 8.24, "learning_rate": 8.961303462321792e-05, "loss": 1.0063, "step": 4120 }, { "epoch": 8.26, "learning_rate": 8.859470468431772e-05, "loss": 1.071, "step": 4130 }, { "epoch": 8.28, "learning_rate": 8.757637474541752e-05, "loss": 1.06, "step": 4140 }, { "epoch": 8.3, "learning_rate": 8.655804480651731e-05, "loss": 1.016, "step": 4150 }, { "epoch": 8.32, "learning_rate": 8.553971486761711e-05, "loss": 1.0764, "step": 4160 }, { "epoch": 8.34, "learning_rate": 8.45213849287169e-05, "loss": 1.0446, "step": 4170 }, { "epoch": 8.36, "learning_rate": 8.35030549898167e-05, "loss": 1.0479, "step": 4180 }, { "epoch": 8.38, "learning_rate": 8.24847250509165e-05, "loss": 1.0283, "step": 4190 }, { "epoch": 8.4, "learning_rate": 8.14663951120163e-05, "loss": 1.0532, "step": 4200 }, { "epoch": 8.4, "eval_loss": 0.9309422373771667, "eval_rouge1": 0.3629265409740602, "eval_rouge2": 0.18256578670757062, "eval_rougeL": 0.30644935758402025, "eval_rougeLsum": 0.3074292385019225, "eval_runtime": 21.7991, "eval_samples_per_second": 0.917, "eval_steps_per_second": 0.459, "step": 4200 }, { "epoch": 8.42, "learning_rate": 8.044806517311609e-05, "loss": 1.0408, "step": 4210 }, { "epoch": 8.44, "learning_rate": 7.942973523421589e-05, "loss": 1.0554, "step": 4220 }, { "epoch": 8.46, "learning_rate": 7.841140529531568e-05, "loss": 1.086, "step": 4230 }, { "epoch": 8.48, "learning_rate": 7.739307535641548e-05, "loss": 1.0316, "step": 4240 }, { "epoch": 8.5, "learning_rate": 7.637474541751528e-05, "loss": 1.052, "step": 4250 }, { "epoch": 8.52, "learning_rate": 7.535641547861507e-05, "loss": 1.0419, "step": 4260 }, { "epoch": 8.54, "learning_rate": 7.433808553971487e-05, "loss": 1.0657, "step": 4270 }, { "epoch": 8.56, "learning_rate": 7.331975560081466e-05, "loss": 1.03, "step": 4280 }, { "epoch": 8.58, "learning_rate": 7.230142566191446e-05, "loss": 1.042, "step": 4290 }, { "epoch": 8.6, "learning_rate": 7.128309572301425e-05, "loss": 1.0649, "step": 4300 }, { "epoch": 8.6, "eval_loss": 0.9269277453422546, "eval_rouge1": 0.3670666365189951, "eval_rouge2": 0.18356209763485815, "eval_rougeL": 0.3012090907355077, "eval_rougeLsum": 0.3039222282541081, "eval_runtime": 22.1694, "eval_samples_per_second": 0.902, "eval_steps_per_second": 0.451, "step": 4300 }, { "epoch": 8.62, "learning_rate": 7.026476578411406e-05, "loss": 1.0671, "step": 4310 }, { "epoch": 8.64, "learning_rate": 6.924643584521385e-05, "loss": 1.0237, "step": 4320 }, { "epoch": 8.66, "learning_rate": 6.822810590631365e-05, "loss": 1.0582, "step": 4330 }, { "epoch": 8.68, "learning_rate": 6.720977596741344e-05, "loss": 1.0325, "step": 4340 }, { "epoch": 8.7, "learning_rate": 6.619144602851324e-05, "loss": 1.0474, "step": 4350 }, { "epoch": 8.72, "learning_rate": 6.517311608961303e-05, "loss": 1.0346, "step": 4360 }, { "epoch": 8.74, "learning_rate": 6.415478615071284e-05, "loss": 1.063, "step": 4370 }, { "epoch": 8.76, "learning_rate": 6.313645621181263e-05, "loss": 1.0337, "step": 4380 }, { "epoch": 8.78, "learning_rate": 6.211812627291243e-05, "loss": 1.0531, "step": 4390 }, { "epoch": 8.8, "learning_rate": 6.109979633401222e-05, "loss": 1.073, "step": 4400 }, { "epoch": 8.8, "eval_loss": 0.9253420829772949, "eval_rouge1": 0.3653533114499009, "eval_rouge2": 0.17889561755188924, "eval_rougeL": 0.30070525024462247, "eval_rougeLsum": 0.3024312386336895, "eval_runtime": 21.8579, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.458, "step": 4400 }, { "epoch": 8.82, "learning_rate": 6.008146639511202e-05, "loss": 1.0716, "step": 4410 }, { "epoch": 8.84, "learning_rate": 5.9063136456211815e-05, "loss": 1.0522, "step": 4420 }, { "epoch": 8.86, "learning_rate": 5.804480651731161e-05, "loss": 1.0401, "step": 4430 }, { "epoch": 8.88, "learning_rate": 5.702647657841141e-05, "loss": 1.0178, "step": 4440 }, { "epoch": 8.9, "learning_rate": 5.6008146639511206e-05, "loss": 1.1043, "step": 4450 }, { "epoch": 8.92, "learning_rate": 5.4989816700611e-05, "loss": 1.0424, "step": 4460 }, { "epoch": 8.94, "learning_rate": 5.39714867617108e-05, "loss": 1.0599, "step": 4470 }, { "epoch": 8.96, "learning_rate": 5.295315682281059e-05, "loss": 1.0331, "step": 4480 }, { "epoch": 8.98, "learning_rate": 5.1934826883910384e-05, "loss": 1.0467, "step": 4490 }, { "epoch": 9.0, "learning_rate": 5.0916496945010185e-05, "loss": 1.0491, "step": 4500 }, { "epoch": 9.0, "eval_loss": 0.9241307973861694, "eval_rouge1": 0.3664328537702999, "eval_rouge2": 0.18102691382663055, "eval_rougeL": 0.2998899427822319, "eval_rougeLsum": 0.301943630367904, "eval_runtime": 22.1092, "eval_samples_per_second": 0.905, "eval_steps_per_second": 0.452, "step": 4500 } ], "logging_steps": 10, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.0455982194688e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }