|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.0, |
|
"eval_steps": 100, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 41.2711, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 36.9402, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 30.5965, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 25.168, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 20.2342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 15.0822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 10.8182, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 7.5259, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005, |
|
"loss": 5.6658, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004989816700610998, |
|
"loss": 4.1956, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 3.1490368843078613, |
|
"eval_rouge1": 0.15401031761230216, |
|
"eval_rouge2": 0.06183084855722348, |
|
"eval_rougeL": 0.1487638574423534, |
|
"eval_rougeLsum": 0.1488931379114581, |
|
"eval_runtime": 12.0897, |
|
"eval_samples_per_second": 1.654, |
|
"eval_steps_per_second": 0.827, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004979633401221996, |
|
"loss": 3.6249, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004969450101832995, |
|
"loss": 3.271, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004959266802443992, |
|
"loss": 3.1977, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000494908350305499, |
|
"loss": 2.8583, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004938900203665988, |
|
"loss": 2.5156, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004928716904276986, |
|
"loss": 2.1851, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004918533604887983, |
|
"loss": 1.9845, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004908350305498982, |
|
"loss": 1.8303, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004898167006109979, |
|
"loss": 1.7903, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004887983706720978, |
|
"loss": 1.7158, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.259326696395874, |
|
"eval_rouge1": 0.3038539506809194, |
|
"eval_rouge2": 0.1446061100175765, |
|
"eval_rougeL": 0.2647138754144953, |
|
"eval_rougeLsum": 0.26637210414553936, |
|
"eval_runtime": 18.5868, |
|
"eval_samples_per_second": 1.076, |
|
"eval_steps_per_second": 0.538, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004877800407331975, |
|
"loss": 1.7002, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0004867617107942974, |
|
"loss": 1.6374, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00048574338085539715, |
|
"loss": 1.7013, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004847250509164969, |
|
"loss": 1.624, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004837067209775968, |
|
"loss": 1.6216, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00048268839103869654, |
|
"loss": 1.5784, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00048167006109979635, |
|
"loss": 1.5394, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00048065173116089617, |
|
"loss": 1.5302, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00047963340122199593, |
|
"loss": 1.5071, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00047861507128309574, |
|
"loss": 1.526, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.1715915203094482, |
|
"eval_rouge1": 0.29557394630545675, |
|
"eval_rouge2": 0.1360179746171492, |
|
"eval_rougeL": 0.25515731019715504, |
|
"eval_rougeLsum": 0.25695835000784906, |
|
"eval_runtime": 27.3621, |
|
"eval_samples_per_second": 0.731, |
|
"eval_steps_per_second": 0.365, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004775967413441955, |
|
"loss": 1.4737, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004765784114052953, |
|
"loss": 1.4955, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00047556008146639513, |
|
"loss": 1.4392, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0004745417515274949, |
|
"loss": 1.4535, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00047352342158859476, |
|
"loss": 1.4625, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004725050916496945, |
|
"loss": 1.4353, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004714867617107943, |
|
"loss": 1.4088, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004704684317718941, |
|
"loss": 1.4472, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004694501018329939, |
|
"loss": 1.4318, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0004684317718940937, |
|
"loss": 1.4318, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.1233234405517578, |
|
"eval_rouge1": 0.33840282066392835, |
|
"eval_rouge2": 0.16041046986254748, |
|
"eval_rougeL": 0.2758850020825072, |
|
"eval_rougeLsum": 0.27526491091998107, |
|
"eval_runtime": 25.6589, |
|
"eval_samples_per_second": 0.779, |
|
"eval_steps_per_second": 0.39, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0004674134419551935, |
|
"loss": 1.3872, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004663951120162933, |
|
"loss": 1.4539, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00046537678207739307, |
|
"loss": 1.4651, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004643584521384929, |
|
"loss": 1.3875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00046334012219959264, |
|
"loss": 1.4463, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0004623217922606925, |
|
"loss": 1.4106, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00046130346232179227, |
|
"loss": 1.3987, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00046028513238289203, |
|
"loss": 1.4239, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0004592668024439919, |
|
"loss": 1.4184, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00045824847250509166, |
|
"loss": 1.438, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.0997542142868042, |
|
"eval_rouge1": 0.31808868620058295, |
|
"eval_rouge2": 0.15095978898822576, |
|
"eval_rougeL": 0.2758976305910347, |
|
"eval_rougeLsum": 0.2783141924283456, |
|
"eval_runtime": 24.4917, |
|
"eval_samples_per_second": 0.817, |
|
"eval_steps_per_second": 0.408, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0004572301425661914, |
|
"loss": 1.3892, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0004562118126272913, |
|
"loss": 1.3238, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00045519348268839105, |
|
"loss": 1.3365, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0004541751527494908, |
|
"loss": 1.3769, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0004531568228105906, |
|
"loss": 1.337, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00045213849287169044, |
|
"loss": 1.3606, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00045112016293279026, |
|
"loss": 1.3643, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00045010183299389, |
|
"loss": 1.297, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00044908350305498983, |
|
"loss": 1.3871, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00044806517311608965, |
|
"loss": 1.3163, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.0784223079681396, |
|
"eval_rouge1": 0.3334137177907346, |
|
"eval_rouge2": 0.15261190778459077, |
|
"eval_rougeL": 0.2719649985784092, |
|
"eval_rougeLsum": 0.27272497195004963, |
|
"eval_runtime": 21.6088, |
|
"eval_samples_per_second": 0.926, |
|
"eval_steps_per_second": 0.463, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0004470468431771894, |
|
"loss": 1.3417, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00044602851323828917, |
|
"loss": 1.3073, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00044501018329938904, |
|
"loss": 1.3036, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0004439918533604888, |
|
"loss": 1.3535, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0004429735234215886, |
|
"loss": 1.334, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00044195519348268843, |
|
"loss": 1.372, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0004409368635437882, |
|
"loss": 1.3312, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.000439918533604888, |
|
"loss": 1.3126, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00043890020366598776, |
|
"loss": 1.3103, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0004378818737270876, |
|
"loss": 1.3625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 1.0480375289916992, |
|
"eval_rouge1": 0.3252587017685009, |
|
"eval_rouge2": 0.14695387868011492, |
|
"eval_rougeL": 0.2819448898014987, |
|
"eval_rougeLsum": 0.2825983453315859, |
|
"eval_runtime": 21.3066, |
|
"eval_samples_per_second": 0.939, |
|
"eval_steps_per_second": 0.469, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0004368635437881874, |
|
"loss": 1.3607, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00043584521384928715, |
|
"loss": 1.3053, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00043482688391038697, |
|
"loss": 1.306, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0004338085539714868, |
|
"loss": 1.3133, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00043279022403258654, |
|
"loss": 1.3261, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0004317718940936864, |
|
"loss": 1.2707, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0004307535641547862, |
|
"loss": 1.305, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00042973523421588593, |
|
"loss": 1.3298, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00042871690427698575, |
|
"loss": 1.295, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00042769857433808556, |
|
"loss": 1.3147, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.0463201999664307, |
|
"eval_rouge1": 0.3472651013247176, |
|
"eval_rouge2": 0.16642476533004819, |
|
"eval_rougeL": 0.2866317550606935, |
|
"eval_rougeLsum": 0.2880956525742435, |
|
"eval_runtime": 23.4232, |
|
"eval_samples_per_second": 0.854, |
|
"eval_steps_per_second": 0.427, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0004266802443991853, |
|
"loss": 1.258, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00042566191446028514, |
|
"loss": 1.3465, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00042464358452138495, |
|
"loss": 1.3383, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0004236252545824847, |
|
"loss": 1.2732, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00042260692464358453, |
|
"loss": 1.2712, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0004215885947046843, |
|
"loss": 1.2621, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00042057026476578416, |
|
"loss": 1.2981, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0004195519348268839, |
|
"loss": 1.3079, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0004185336048879837, |
|
"loss": 1.3029, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00041751527494908355, |
|
"loss": 1.2826, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 1.0260637998580933, |
|
"eval_rouge1": 0.3271562809197811, |
|
"eval_rouge2": 0.14988747443733796, |
|
"eval_rougeL": 0.27055113042556145, |
|
"eval_rougeLsum": 0.27258271275088874, |
|
"eval_runtime": 21.6038, |
|
"eval_samples_per_second": 0.926, |
|
"eval_steps_per_second": 0.463, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0004164969450101833, |
|
"loss": 1.296, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00041547861507128307, |
|
"loss": 1.2451, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0004144602851323829, |
|
"loss": 1.2716, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0004134419551934827, |
|
"loss": 1.2908, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0004124236252545825, |
|
"loss": 1.2433, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0004114052953156823, |
|
"loss": 1.2606, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0004103869653767821, |
|
"loss": 1.2975, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0004093686354378819, |
|
"loss": 1.245, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00040835030549898167, |
|
"loss": 1.2774, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0004073319755600815, |
|
"loss": 1.297, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.02230966091156, |
|
"eval_rouge1": 0.33765263015500535, |
|
"eval_rouge2": 0.16173841205100162, |
|
"eval_rougeL": 0.2891261055238028, |
|
"eval_rougeLsum": 0.2913636739603367, |
|
"eval_runtime": 21.7283, |
|
"eval_samples_per_second": 0.92, |
|
"eval_steps_per_second": 0.46, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0004063136456211813, |
|
"loss": 1.2437, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00040529531568228106, |
|
"loss": 1.228, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0004042769857433808, |
|
"loss": 1.1951, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0004032586558044807, |
|
"loss": 1.205, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00040224032586558045, |
|
"loss": 1.2154, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00040122199592668026, |
|
"loss": 1.2449, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0004002036659877801, |
|
"loss": 1.2331, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00039918533604887984, |
|
"loss": 1.1912, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00039816700610997965, |
|
"loss": 1.2084, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0003971486761710794, |
|
"loss": 1.2038, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 1.014428973197937, |
|
"eval_rouge1": 0.32994381772002457, |
|
"eval_rouge2": 0.1560313842830252, |
|
"eval_rougeL": 0.2757795040195402, |
|
"eval_rougeLsum": 0.27717385084380153, |
|
"eval_runtime": 21.4325, |
|
"eval_samples_per_second": 0.933, |
|
"eval_steps_per_second": 0.467, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00039613034623217923, |
|
"loss": 1.281, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00039511201629327904, |
|
"loss": 1.1934, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0003940936863543788, |
|
"loss": 1.2564, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0003930753564154787, |
|
"loss": 1.2175, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00039205702647657843, |
|
"loss": 1.2252, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0003910386965376782, |
|
"loss": 1.2343, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.000390020366598778, |
|
"loss": 1.2715, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0003890020366598778, |
|
"loss": 1.2275, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0003879837067209776, |
|
"loss": 1.2482, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0003869653767820774, |
|
"loss": 1.2617, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.9945599436759949, |
|
"eval_rouge1": 0.34311982189768164, |
|
"eval_rouge2": 0.16176698655947228, |
|
"eval_rougeL": 0.2933063191954748, |
|
"eval_rougeLsum": 0.29522199653692416, |
|
"eval_runtime": 20.2751, |
|
"eval_samples_per_second": 0.986, |
|
"eval_steps_per_second": 0.493, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0003859470468431772, |
|
"loss": 1.19, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.000384928716904277, |
|
"loss": 1.2462, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0003839103869653768, |
|
"loss": 1.2387, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00038289205702647655, |
|
"loss": 1.2516, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0003818737270875764, |
|
"loss": 1.2543, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0003808553971486762, |
|
"loss": 1.246, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00037983706720977594, |
|
"loss": 1.2356, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0003788187372708758, |
|
"loss": 1.2301, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00037780040733197557, |
|
"loss": 1.2465, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00037678207739307533, |
|
"loss": 1.2219, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.9984191656112671, |
|
"eval_rouge1": 0.35530591683468205, |
|
"eval_rouge2": 0.1738300131149214, |
|
"eval_rougeL": 0.3007508066988732, |
|
"eval_rougeLsum": 0.30111433503149587, |
|
"eval_runtime": 21.0854, |
|
"eval_samples_per_second": 0.949, |
|
"eval_steps_per_second": 0.474, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0003757637474541752, |
|
"loss": 1.2142, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00037474541751527496, |
|
"loss": 1.1964, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0003737270875763747, |
|
"loss": 1.2053, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00037270875763747454, |
|
"loss": 1.2118, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00037169042769857435, |
|
"loss": 1.1915, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00037067209775967417, |
|
"loss": 1.2272, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00036965376782077393, |
|
"loss": 1.1926, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00036863543788187374, |
|
"loss": 1.2056, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00036761710794297356, |
|
"loss": 1.1868, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0003665987780040733, |
|
"loss": 1.1906, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.9987648129463196, |
|
"eval_rouge1": 0.3497044345695397, |
|
"eval_rouge2": 0.17488344746541123, |
|
"eval_rougeL": 0.2920352923144545, |
|
"eval_rougeLsum": 0.2936840461100848, |
|
"eval_runtime": 20.5611, |
|
"eval_samples_per_second": 0.973, |
|
"eval_steps_per_second": 0.486, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0003655804480651731, |
|
"loss": 1.1981, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00036456211812627295, |
|
"loss": 1.2079, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0003635437881873727, |
|
"loss": 1.2036, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00036252545824847247, |
|
"loss": 1.1892, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00036150712830957234, |
|
"loss": 1.2223, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0003604887983706721, |
|
"loss": 1.2214, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0003594704684317719, |
|
"loss": 1.2206, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0003584521384928717, |
|
"loss": 1.2218, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0003574338085539715, |
|
"loss": 1.1856, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0003564154786150713, |
|
"loss": 1.2303, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.9856168031692505, |
|
"eval_rouge1": 0.3750110269055352, |
|
"eval_rouge2": 0.17459799489090816, |
|
"eval_rougeL": 0.3003592834064919, |
|
"eval_rougeLsum": 0.30119936143252757, |
|
"eval_runtime": 23.4082, |
|
"eval_samples_per_second": 0.854, |
|
"eval_steps_per_second": 0.427, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00035539714867617106, |
|
"loss": 1.1498, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0003543788187372709, |
|
"loss": 1.2083, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0003533604887983707, |
|
"loss": 1.2291, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00035234215885947045, |
|
"loss": 1.1733, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0003513238289205703, |
|
"loss": 1.1716, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0003503054989816701, |
|
"loss": 1.1657, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00034928716904276985, |
|
"loss": 1.1538, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00034826883910386966, |
|
"loss": 1.1579, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0003472505091649695, |
|
"loss": 1.1945, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00034623217922606924, |
|
"loss": 1.18, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.9823005795478821, |
|
"eval_rouge1": 0.35334084985835634, |
|
"eval_rouge2": 0.1773564424695835, |
|
"eval_rougeL": 0.30516391982892244, |
|
"eval_rougeLsum": 0.30596841516938367, |
|
"eval_runtime": 22.0682, |
|
"eval_samples_per_second": 0.906, |
|
"eval_steps_per_second": 0.453, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00034521384928716905, |
|
"loss": 1.1782, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00034419551934826887, |
|
"loss": 1.2039, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0003431771894093686, |
|
"loss": 1.1961, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00034215885947046844, |
|
"loss": 1.1846, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0003411405295315682, |
|
"loss": 1.1776, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00034012219959266807, |
|
"loss": 1.1549, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00033910386965376783, |
|
"loss": 1.1442, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0003380855397148676, |
|
"loss": 1.1672, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.00033706720977596746, |
|
"loss": 1.1854, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0003360488798370672, |
|
"loss": 1.1435, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 0.9792933464050293, |
|
"eval_rouge1": 0.3566145348467804, |
|
"eval_rouge2": 0.16981457897259283, |
|
"eval_rougeL": 0.29555110085672354, |
|
"eval_rougeLsum": 0.29638538572800865, |
|
"eval_runtime": 20.8074, |
|
"eval_samples_per_second": 0.961, |
|
"eval_steps_per_second": 0.481, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.000335030549898167, |
|
"loss": 1.1721, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0003340122199592668, |
|
"loss": 1.1546, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0003329938900203666, |
|
"loss": 1.1843, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.00033197556008146637, |
|
"loss": 1.1968, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0003309572301425662, |
|
"loss": 1.1481, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.000329938900203666, |
|
"loss": 1.1473, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0003289205702647658, |
|
"loss": 1.1729, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0003279022403258656, |
|
"loss": 1.1401, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0003268839103869654, |
|
"loss": 1.1602, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0003258655804480652, |
|
"loss": 1.1473, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 0.9619871973991394, |
|
"eval_rouge1": 0.36668217495119926, |
|
"eval_rouge2": 0.18802901856822518, |
|
"eval_rougeL": 0.3009048329724593, |
|
"eval_rougeLsum": 0.3020958756940847, |
|
"eval_runtime": 20.3135, |
|
"eval_samples_per_second": 0.985, |
|
"eval_steps_per_second": 0.492, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.00032484725050916497, |
|
"loss": 1.1533, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00032382892057026473, |
|
"loss": 1.1557, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0003228105906313646, |
|
"loss": 1.2091, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00032179226069246436, |
|
"loss": 1.1791, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0003207739307535642, |
|
"loss": 1.1407, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.000319755600814664, |
|
"loss": 1.1498, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.00031873727087576375, |
|
"loss": 1.1368, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.00031771894093686356, |
|
"loss": 1.1634, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0003167006109979633, |
|
"loss": 1.168, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00031568228105906314, |
|
"loss": 1.1588, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 0.9718366861343384, |
|
"eval_rouge1": 0.36821337929430387, |
|
"eval_rouge2": 0.17821834663511393, |
|
"eval_rougeL": 0.30543977513475806, |
|
"eval_rougeLsum": 0.3075353240284504, |
|
"eval_runtime": 20.7174, |
|
"eval_samples_per_second": 0.965, |
|
"eval_steps_per_second": 0.483, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00031466395112016295, |
|
"loss": 1.1989, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0003136456211812627, |
|
"loss": 1.1542, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.00031262729124236253, |
|
"loss": 1.203, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00031160896130346234, |
|
"loss": 1.1644, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0003105906313645621, |
|
"loss": 1.1266, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.0003095723014256619, |
|
"loss": 1.1994, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00030855397148676173, |
|
"loss": 1.1402, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0003075356415478615, |
|
"loss": 1.1348, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0003065173116089613, |
|
"loss": 1.1582, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0003054989816700611, |
|
"loss": 1.127, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.9626486897468567, |
|
"eval_rouge1": 0.33933999922378055, |
|
"eval_rouge2": 0.15960640801652384, |
|
"eval_rougeL": 0.27841295690954404, |
|
"eval_rougeLsum": 0.2805778340104648, |
|
"eval_runtime": 20.5994, |
|
"eval_samples_per_second": 0.971, |
|
"eval_steps_per_second": 0.485, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0003044806517311609, |
|
"loss": 1.1229, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0003034623217922607, |
|
"loss": 1.1027, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0003024439918533605, |
|
"loss": 1.0923, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.0003014256619144603, |
|
"loss": 1.1416, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0003004073319755601, |
|
"loss": 1.1066, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00029938900203665985, |
|
"loss": 1.1045, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.0002983706720977597, |
|
"loss": 1.1172, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0002973523421588595, |
|
"loss": 1.1535, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00029633401221995924, |
|
"loss": 1.1866, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0002953156822810591, |
|
"loss": 1.1251, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 0.9701215028762817, |
|
"eval_rouge1": 0.36918849108015117, |
|
"eval_rouge2": 0.1756845524684395, |
|
"eval_rougeL": 0.29888162339098345, |
|
"eval_rougeLsum": 0.30162046588140573, |
|
"eval_runtime": 24.6363, |
|
"eval_samples_per_second": 0.812, |
|
"eval_steps_per_second": 0.406, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00029429735234215887, |
|
"loss": 1.1246, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00029327902240325863, |
|
"loss": 1.1562, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00029226069246435845, |
|
"loss": 1.1627, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00029124236252545826, |
|
"loss": 1.1157, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0002902240325865581, |
|
"loss": 1.1398, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00028920570264765784, |
|
"loss": 1.1432, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00028818737270875765, |
|
"loss": 1.1318, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00028716904276985747, |
|
"loss": 1.1461, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00028615071283095723, |
|
"loss": 1.13, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.000285132382892057, |
|
"loss": 1.085, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.9604999423027039, |
|
"eval_rouge1": 0.3531685802104979, |
|
"eval_rouge2": 0.16500844586291133, |
|
"eval_rougeL": 0.28475165873194214, |
|
"eval_rougeLsum": 0.2861591101839957, |
|
"eval_runtime": 20.5667, |
|
"eval_samples_per_second": 0.972, |
|
"eval_steps_per_second": 0.486, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00028411405295315686, |
|
"loss": 1.1408, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0002830957230142566, |
|
"loss": 1.1458, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.0002820773930753564, |
|
"loss": 1.1056, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00028105906313645625, |
|
"loss": 1.1933, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.000280040733197556, |
|
"loss": 1.1478, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002790224032586558, |
|
"loss": 1.0862, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0002780040733197556, |
|
"loss": 1.1433, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0002769857433808554, |
|
"loss": 1.124, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0002759674134419552, |
|
"loss": 1.095, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.000274949083503055, |
|
"loss": 1.0922, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 0.9633736610412598, |
|
"eval_rouge1": 0.35626224117845484, |
|
"eval_rouge2": 0.16997535406808995, |
|
"eval_rougeL": 0.2941903529387039, |
|
"eval_rougeLsum": 0.2963873855076279, |
|
"eval_runtime": 22.0443, |
|
"eval_samples_per_second": 0.907, |
|
"eval_steps_per_second": 0.454, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.0002739307535641548, |
|
"loss": 1.1468, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002729124236252546, |
|
"loss": 1.1456, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00027189409368635437, |
|
"loss": 1.1317, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0002708757637474542, |
|
"loss": 1.1078, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.000269857433808554, |
|
"loss": 1.143, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00026883910386965376, |
|
"loss": 1.1194, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00026782077393075357, |
|
"loss": 1.1122, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002668024439918534, |
|
"loss": 1.1275, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00026578411405295315, |
|
"loss": 1.1305, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00026476578411405296, |
|
"loss": 1.1649, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.9545726776123047, |
|
"eval_rouge1": 0.38101044159436365, |
|
"eval_rouge2": 0.1957583700370854, |
|
"eval_rougeL": 0.3214174366996265, |
|
"eval_rougeLsum": 0.3228583149166323, |
|
"eval_runtime": 21.4511, |
|
"eval_samples_per_second": 0.932, |
|
"eval_steps_per_second": 0.466, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.0002637474541751528, |
|
"loss": 1.104, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.00026272912423625254, |
|
"loss": 1.1482, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.00026171079429735235, |
|
"loss": 1.1361, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0002606924643584521, |
|
"loss": 1.0924, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.000259674134419552, |
|
"loss": 1.1313, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00025865580448065174, |
|
"loss": 1.0971, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0002576374745417515, |
|
"loss": 1.1653, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00025661914460285137, |
|
"loss": 1.1438, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00025560081466395113, |
|
"loss": 1.139, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0002545824847250509, |
|
"loss": 1.1279, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.9487113952636719, |
|
"eval_rouge1": 0.3782997160632432, |
|
"eval_rouge2": 0.20289958909161465, |
|
"eval_rougeL": 0.3222664292441296, |
|
"eval_rougeLsum": 0.3240112049119128, |
|
"eval_runtime": 21.5846, |
|
"eval_samples_per_second": 0.927, |
|
"eval_steps_per_second": 0.463, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0002535641547861507, |
|
"loss": 1.1028, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.0002525458248472505, |
|
"loss": 1.0659, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0002515274949083503, |
|
"loss": 1.115, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0002505091649694501, |
|
"loss": 1.0952, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002494908350305499, |
|
"loss": 1.1092, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00024847250509164973, |
|
"loss": 1.1194, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0002474541751527495, |
|
"loss": 1.11, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0002464358452138493, |
|
"loss": 1.1524, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0002454175152749491, |
|
"loss": 1.1122, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002443991853360489, |
|
"loss": 1.0798, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 0.9421226382255554, |
|
"eval_rouge1": 0.3597269877576089, |
|
"eval_rouge2": 0.1775049607682675, |
|
"eval_rougeL": 0.2919995300536511, |
|
"eval_rougeLsum": 0.29538083346340016, |
|
"eval_runtime": 22.4063, |
|
"eval_samples_per_second": 0.893, |
|
"eval_steps_per_second": 0.446, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.0002433808553971487, |
|
"loss": 1.1141, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00024236252545824845, |
|
"loss": 1.11, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00024134419551934827, |
|
"loss": 1.1206, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00024032586558044808, |
|
"loss": 1.1035, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00023930753564154787, |
|
"loss": 1.1362, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00023828920570264766, |
|
"loss": 1.1023, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00023727087576374745, |
|
"loss": 1.1237, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00023625254582484726, |
|
"loss": 1.0934, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00023523421588594705, |
|
"loss": 1.0976, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00023421588594704684, |
|
"loss": 1.1468, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 0.9474976658821106, |
|
"eval_rouge1": 0.37719594840753984, |
|
"eval_rouge2": 0.19434086595475156, |
|
"eval_rougeL": 0.312798697930013, |
|
"eval_rougeLsum": 0.3158141266137696, |
|
"eval_runtime": 21.5064, |
|
"eval_samples_per_second": 0.93, |
|
"eval_steps_per_second": 0.465, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00023319755600814665, |
|
"loss": 1.1283, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.00023217922606924644, |
|
"loss": 1.105, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00023116089613034625, |
|
"loss": 1.0968, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00023014256619144602, |
|
"loss": 1.0931, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00022912423625254583, |
|
"loss": 1.1181, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.00022810590631364565, |
|
"loss": 1.0492, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0002270875763747454, |
|
"loss": 1.0888, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00022606924643584522, |
|
"loss": 1.0944, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.000225050916496945, |
|
"loss": 1.0851, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00022403258655804482, |
|
"loss": 1.073, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.9429187774658203, |
|
"eval_rouge1": 0.3534213889657237, |
|
"eval_rouge2": 0.1666029802662448, |
|
"eval_rougeL": 0.298122774116486, |
|
"eval_rougeLsum": 0.3017447168430196, |
|
"eval_runtime": 21.9067, |
|
"eval_samples_per_second": 0.913, |
|
"eval_steps_per_second": 0.456, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00022301425661914458, |
|
"loss": 1.1145, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.0002219959266802444, |
|
"loss": 1.081, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00022097759674134421, |
|
"loss": 1.1057, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.000219959266802444, |
|
"loss": 1.1037, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0002189409368635438, |
|
"loss": 1.0963, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00021792260692464358, |
|
"loss": 1.0834, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.0002169042769857434, |
|
"loss": 1.1108, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0002158859470468432, |
|
"loss": 1.1077, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00021486761710794297, |
|
"loss": 1.0936, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00021384928716904278, |
|
"loss": 1.0598, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_loss": 0.9476393461227417, |
|
"eval_rouge1": 0.3460493427394816, |
|
"eval_rouge2": 0.16425447886287378, |
|
"eval_rougeL": 0.29176256122671795, |
|
"eval_rougeLsum": 0.2932874710130029, |
|
"eval_runtime": 22.7632, |
|
"eval_samples_per_second": 0.879, |
|
"eval_steps_per_second": 0.439, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.00021283095723014257, |
|
"loss": 1.1012, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00021181262729124236, |
|
"loss": 1.1155, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00021079429735234215, |
|
"loss": 1.0611, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.00020977596741344196, |
|
"loss": 1.1044, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.00020875763747454178, |
|
"loss": 1.0781, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00020773930753564154, |
|
"loss": 1.118, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00020672097759674135, |
|
"loss": 1.0767, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.00020570264765784114, |
|
"loss": 1.1183, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00020468431771894095, |
|
"loss": 1.0675, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00020366598778004074, |
|
"loss": 1.0567, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.9474280476570129, |
|
"eval_rouge1": 0.3549405529957428, |
|
"eval_rouge2": 0.17220677580168336, |
|
"eval_rougeL": 0.29622719821503607, |
|
"eval_rougeLsum": 0.29765510347367236, |
|
"eval_runtime": 21.6142, |
|
"eval_samples_per_second": 0.925, |
|
"eval_steps_per_second": 0.463, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00020264765784114053, |
|
"loss": 1.082, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00020162932790224034, |
|
"loss": 1.0653, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00020061099796334013, |
|
"loss": 1.0819, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00019959266802443992, |
|
"loss": 1.0833, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0001985743380855397, |
|
"loss": 1.0683, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00019755600814663952, |
|
"loss": 1.0805, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00019653767820773934, |
|
"loss": 1.0535, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0001955193482688391, |
|
"loss": 1.0952, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.0001945010183299389, |
|
"loss": 1.0997, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0001934826883910387, |
|
"loss": 1.0773, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 0.9442319869995117, |
|
"eval_rouge1": 0.3697494602820765, |
|
"eval_rouge2": 0.17309826892569075, |
|
"eval_rougeL": 0.30189218077829605, |
|
"eval_rougeLsum": 0.3058880236996683, |
|
"eval_runtime": 22.8637, |
|
"eval_samples_per_second": 0.875, |
|
"eval_steps_per_second": 0.437, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0001924643584521385, |
|
"loss": 1.0739, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00019144602851323828, |
|
"loss": 1.0964, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0001904276985743381, |
|
"loss": 1.0914, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0001894093686354379, |
|
"loss": 1.0836, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00018839103869653767, |
|
"loss": 1.0812, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00018737270875763748, |
|
"loss": 1.1139, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00018635437881873727, |
|
"loss": 1.0565, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00018533604887983708, |
|
"loss": 1.1065, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00018431771894093687, |
|
"loss": 1.0972, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00018329938900203666, |
|
"loss": 1.0704, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.9415400624275208, |
|
"eval_rouge1": 0.3552015796577298, |
|
"eval_rouge2": 0.17624288378508868, |
|
"eval_rougeL": 0.29158581025602837, |
|
"eval_rougeLsum": 0.2947848268366889, |
|
"eval_runtime": 21.6045, |
|
"eval_samples_per_second": 0.926, |
|
"eval_steps_per_second": 0.463, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00018228105906313647, |
|
"loss": 1.0911, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00018126272912423623, |
|
"loss": 1.1143, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00018024439918533605, |
|
"loss": 1.0511, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00017922606924643584, |
|
"loss": 1.0629, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00017820773930753565, |
|
"loss": 1.0291, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00017718940936863544, |
|
"loss": 1.0673, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00017617107942973523, |
|
"loss": 1.0804, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00017515274949083504, |
|
"loss": 1.0569, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00017413441955193483, |
|
"loss": 1.086, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00017311608961303462, |
|
"loss": 1.0676, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.9348514676094055, |
|
"eval_rouge1": 0.37110363769688715, |
|
"eval_rouge2": 0.18311514856569724, |
|
"eval_rougeL": 0.3001004216892586, |
|
"eval_rougeLsum": 0.30109761355013054, |
|
"eval_runtime": 22.0391, |
|
"eval_samples_per_second": 0.907, |
|
"eval_steps_per_second": 0.454, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00017209775967413443, |
|
"loss": 1.0646, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00017107942973523422, |
|
"loss": 1.1062, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00017006109979633404, |
|
"loss": 1.0522, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0001690427698574338, |
|
"loss": 1.0598, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0001680244399185336, |
|
"loss": 1.0777, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.0001670061099796334, |
|
"loss": 1.0967, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.00016598778004073319, |
|
"loss": 1.0654, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.000164969450101833, |
|
"loss": 1.0776, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0001639511201629328, |
|
"loss": 1.126, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0001629327902240326, |
|
"loss": 1.1015, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 0.9340616464614868, |
|
"eval_rouge1": 0.3645773285826771, |
|
"eval_rouge2": 0.1797763488186414, |
|
"eval_rougeL": 0.3043672589098563, |
|
"eval_rougeLsum": 0.30620848729732686, |
|
"eval_runtime": 22.359, |
|
"eval_samples_per_second": 0.894, |
|
"eval_steps_per_second": 0.447, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00016191446028513236, |
|
"loss": 1.0782, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00016089613034623218, |
|
"loss": 1.0664, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.000159877800407332, |
|
"loss": 1.0867, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00015885947046843178, |
|
"loss": 1.0813, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.00015784114052953157, |
|
"loss": 1.0541, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.00015682281059063136, |
|
"loss": 1.0479, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00015580448065173117, |
|
"loss": 1.0574, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00015478615071283096, |
|
"loss": 1.0775, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00015376782077393075, |
|
"loss": 1.0567, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00015274949083503056, |
|
"loss": 1.0895, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.9335128664970398, |
|
"eval_rouge1": 0.3564737046975759, |
|
"eval_rouge2": 0.17665902660761595, |
|
"eval_rougeL": 0.2936209010289881, |
|
"eval_rougeLsum": 0.29567036214117715, |
|
"eval_runtime": 21.7504, |
|
"eval_samples_per_second": 0.92, |
|
"eval_steps_per_second": 0.46, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00015173116089613035, |
|
"loss": 1.019, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00015071283095723014, |
|
"loss": 1.0701, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.00014969450101832993, |
|
"loss": 1.064, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00014867617107942974, |
|
"loss": 1.0464, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00014765784114052956, |
|
"loss": 1.0958, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00014663951120162932, |
|
"loss": 1.0458, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00014562118126272913, |
|
"loss": 1.0012, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00014460285132382892, |
|
"loss": 1.0629, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.00014358452138492873, |
|
"loss": 1.049, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0001425661914460285, |
|
"loss": 1.0839, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.9262797236442566, |
|
"eval_rouge1": 0.3608261849266158, |
|
"eval_rouge2": 0.16796265957036072, |
|
"eval_rougeL": 0.300587364099427, |
|
"eval_rougeLsum": 0.3018268665573097, |
|
"eval_runtime": 22.8495, |
|
"eval_samples_per_second": 0.875, |
|
"eval_steps_per_second": 0.438, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.0001415478615071283, |
|
"loss": 1.0231, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00014052953156822812, |
|
"loss": 1.043, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0001395112016293279, |
|
"loss": 1.0423, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.0001384928716904277, |
|
"loss": 1.1131, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0001374745417515275, |
|
"loss": 1.0106, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0001364562118126273, |
|
"loss": 1.0663, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.0001354378818737271, |
|
"loss": 1.0842, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.00013441955193482688, |
|
"loss": 1.0482, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0001334012219959267, |
|
"loss": 1.1137, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00013238289205702648, |
|
"loss": 1.0737, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_loss": 0.9295714497566223, |
|
"eval_rouge1": 0.35713241736503193, |
|
"eval_rouge2": 0.18064337174705725, |
|
"eval_rougeL": 0.2977802249700461, |
|
"eval_rougeLsum": 0.2990767538657372, |
|
"eval_runtime": 21.6594, |
|
"eval_samples_per_second": 0.923, |
|
"eval_steps_per_second": 0.462, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00013136456211812627, |
|
"loss": 1.0793, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00013034623217922606, |
|
"loss": 1.0964, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00012932790224032587, |
|
"loss": 1.0471, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00012830957230142569, |
|
"loss": 1.0647, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.00012729124236252545, |
|
"loss": 1.095, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00012627291242362526, |
|
"loss": 1.0891, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00012525458248472505, |
|
"loss": 1.0876, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.00012423625254582486, |
|
"loss": 1.0112, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00012321792260692465, |
|
"loss": 1.0352, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.00012219959266802444, |
|
"loss": 1.0549, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_loss": 0.9341118931770325, |
|
"eval_rouge1": 0.35969569162284754, |
|
"eval_rouge2": 0.17023162775203884, |
|
"eval_rougeL": 0.28257423918684926, |
|
"eval_rougeLsum": 0.28412150462963986, |
|
"eval_runtime": 22.2811, |
|
"eval_samples_per_second": 0.898, |
|
"eval_steps_per_second": 0.449, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.00012118126272912423, |
|
"loss": 1.0636, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00012016293279022404, |
|
"loss": 1.0719, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.00011914460285132383, |
|
"loss": 1.0451, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00011812627291242363, |
|
"loss": 1.0509, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.00011710794297352342, |
|
"loss": 1.0515, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00011608961303462322, |
|
"loss": 1.0525, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00011507128309572301, |
|
"loss": 1.0751, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00011405295315682282, |
|
"loss": 1.059, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00011303462321792261, |
|
"loss": 1.0352, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00011201629327902241, |
|
"loss": 1.065, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 0.9318963885307312, |
|
"eval_rouge1": 0.36719535582225143, |
|
"eval_rouge2": 0.18487793490577756, |
|
"eval_rougeL": 0.3063474590760422, |
|
"eval_rougeLsum": 0.3072518409290927, |
|
"eval_runtime": 22.5338, |
|
"eval_samples_per_second": 0.888, |
|
"eval_steps_per_second": 0.444, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.0001109979633401222, |
|
"loss": 1.0591, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.000109979633401222, |
|
"loss": 1.0553, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00010896130346232179, |
|
"loss": 1.0766, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0001079429735234216, |
|
"loss": 1.0434, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00010692464358452139, |
|
"loss": 1.0453, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00010590631364562118, |
|
"loss": 1.0754, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00010488798370672098, |
|
"loss": 1.0754, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00010386965376782077, |
|
"loss": 1.0743, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.00010285132382892057, |
|
"loss": 1.0792, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00010183299389002037, |
|
"loss": 1.0676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.9239376783370972, |
|
"eval_rouge1": 0.3708780006445588, |
|
"eval_rouge2": 0.18486714155230236, |
|
"eval_rougeL": 0.30909528682097154, |
|
"eval_rougeLsum": 0.31117436309534663, |
|
"eval_runtime": 22.2042, |
|
"eval_samples_per_second": 0.901, |
|
"eval_steps_per_second": 0.45, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00010081466395112017, |
|
"loss": 1.0105, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.979633401221996e-05, |
|
"loss": 1.0505, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.877800407331976e-05, |
|
"loss": 1.0375, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 9.775967413441955e-05, |
|
"loss": 1.0465, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.674134419551935e-05, |
|
"loss": 1.0249, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.572301425661914e-05, |
|
"loss": 1.0806, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 9.470468431771895e-05, |
|
"loss": 1.0233, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.368635437881874e-05, |
|
"loss": 1.0605, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.266802443991854e-05, |
|
"loss": 1.0336, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.164969450101833e-05, |
|
"loss": 1.0327, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 0.9313555955886841, |
|
"eval_rouge1": 0.3612621102138621, |
|
"eval_rouge2": 0.17914853351838206, |
|
"eval_rougeL": 0.3058448839143484, |
|
"eval_rougeLsum": 0.30736619803392246, |
|
"eval_runtime": 21.8087, |
|
"eval_samples_per_second": 0.917, |
|
"eval_steps_per_second": 0.459, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 9.063136456211812e-05, |
|
"loss": 1.0526, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 8.961303462321792e-05, |
|
"loss": 1.0063, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 8.859470468431772e-05, |
|
"loss": 1.071, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 8.757637474541752e-05, |
|
"loss": 1.06, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.655804480651731e-05, |
|
"loss": 1.016, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.553971486761711e-05, |
|
"loss": 1.0764, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.45213849287169e-05, |
|
"loss": 1.0446, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.35030549898167e-05, |
|
"loss": 1.0479, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.24847250509165e-05, |
|
"loss": 1.0283, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.14663951120163e-05, |
|
"loss": 1.0532, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_loss": 0.9309422373771667, |
|
"eval_rouge1": 0.3629265409740602, |
|
"eval_rouge2": 0.18256578670757062, |
|
"eval_rougeL": 0.30644935758402025, |
|
"eval_rougeLsum": 0.3074292385019225, |
|
"eval_runtime": 21.7991, |
|
"eval_samples_per_second": 0.917, |
|
"eval_steps_per_second": 0.459, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 8.044806517311609e-05, |
|
"loss": 1.0408, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 7.942973523421589e-05, |
|
"loss": 1.0554, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 7.841140529531568e-05, |
|
"loss": 1.086, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.739307535641548e-05, |
|
"loss": 1.0316, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 7.637474541751528e-05, |
|
"loss": 1.052, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 7.535641547861507e-05, |
|
"loss": 1.0419, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 7.433808553971487e-05, |
|
"loss": 1.0657, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.331975560081466e-05, |
|
"loss": 1.03, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 7.230142566191446e-05, |
|
"loss": 1.042, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 7.128309572301425e-05, |
|
"loss": 1.0649, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_loss": 0.9269277453422546, |
|
"eval_rouge1": 0.3670666365189951, |
|
"eval_rouge2": 0.18356209763485815, |
|
"eval_rougeL": 0.3012090907355077, |
|
"eval_rougeLsum": 0.3039222282541081, |
|
"eval_runtime": 22.1694, |
|
"eval_samples_per_second": 0.902, |
|
"eval_steps_per_second": 0.451, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.026476578411406e-05, |
|
"loss": 1.0671, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.924643584521385e-05, |
|
"loss": 1.0237, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 6.822810590631365e-05, |
|
"loss": 1.0582, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 6.720977596741344e-05, |
|
"loss": 1.0325, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.619144602851324e-05, |
|
"loss": 1.0474, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 6.517311608961303e-05, |
|
"loss": 1.0346, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 6.415478615071284e-05, |
|
"loss": 1.063, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 6.313645621181263e-05, |
|
"loss": 1.0337, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 6.211812627291243e-05, |
|
"loss": 1.0531, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.109979633401222e-05, |
|
"loss": 1.073, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.9253420829772949, |
|
"eval_rouge1": 0.3653533114499009, |
|
"eval_rouge2": 0.17889561755188924, |
|
"eval_rougeL": 0.30070525024462247, |
|
"eval_rougeLsum": 0.3024312386336895, |
|
"eval_runtime": 21.8579, |
|
"eval_samples_per_second": 0.915, |
|
"eval_steps_per_second": 0.458, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 6.008146639511202e-05, |
|
"loss": 1.0716, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.9063136456211815e-05, |
|
"loss": 1.0522, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.804480651731161e-05, |
|
"loss": 1.0401, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 5.702647657841141e-05, |
|
"loss": 1.0178, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 5.6008146639511206e-05, |
|
"loss": 1.1043, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 5.4989816700611e-05, |
|
"loss": 1.0424, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.39714867617108e-05, |
|
"loss": 1.0599, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.295315682281059e-05, |
|
"loss": 1.0331, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.1934826883910384e-05, |
|
"loss": 1.0467, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.0916496945010185e-05, |
|
"loss": 1.0491, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.9241307973861694, |
|
"eval_rouge1": 0.3664328537702999, |
|
"eval_rouge2": 0.18102691382663055, |
|
"eval_rougeL": 0.2998899427822319, |
|
"eval_rougeLsum": 0.301943630367904, |
|
"eval_runtime": 22.1092, |
|
"eval_samples_per_second": 0.905, |
|
"eval_steps_per_second": 0.452, |
|
"step": 4500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.0455982194688e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|