{ "best_metric": 0.992469545957918, "best_model_checkpoint": "swinv2-large-patch4-window12to16-192to256-22kto1k-ft-finetuned-LungCancer-LC25000-AH-40-30-30-3/checkpoint-1309", "epoch": 6.990654205607477, "eval_steps": 500, "global_step": 1309, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 3.816793893129771e-06, "loss": 1.1292, "step": 5 }, { "epoch": 0.05, "learning_rate": 7.633587786259541e-06, "loss": 0.9838, "step": 10 }, { "epoch": 0.08, "learning_rate": 1.1450381679389314e-05, "loss": 0.689, "step": 15 }, { "epoch": 0.11, "learning_rate": 1.5267175572519083e-05, "loss": 0.4829, "step": 20 }, { "epoch": 0.13, "learning_rate": 1.9083969465648855e-05, "loss": 0.3431, "step": 25 }, { "epoch": 0.16, "learning_rate": 2.2900763358778628e-05, "loss": 0.3189, "step": 30 }, { "epoch": 0.19, "learning_rate": 2.6717557251908397e-05, "loss": 0.1807, "step": 35 }, { "epoch": 0.21, "learning_rate": 3.0534351145038166e-05, "loss": 0.3006, "step": 40 }, { "epoch": 0.24, "learning_rate": 3.435114503816794e-05, "loss": 0.3244, "step": 45 }, { "epoch": 0.27, "learning_rate": 3.816793893129771e-05, "loss": 0.2128, "step": 50 }, { "epoch": 0.29, "learning_rate": 4.198473282442748e-05, "loss": 0.2781, "step": 55 }, { "epoch": 0.32, "learning_rate": 4.5801526717557256e-05, "loss": 0.181, "step": 60 }, { "epoch": 0.35, "learning_rate": 4.9618320610687025e-05, "loss": 0.1957, "step": 65 }, { "epoch": 0.37, "learning_rate": 5.3435114503816794e-05, "loss": 0.1996, "step": 70 }, { "epoch": 0.4, "learning_rate": 5.725190839694656e-05, "loss": 0.1654, "step": 75 }, { "epoch": 0.43, "learning_rate": 6.106870229007633e-05, "loss": 0.1993, "step": 80 }, { "epoch": 0.45, "learning_rate": 6.488549618320611e-05, "loss": 0.1601, "step": 85 }, { "epoch": 0.48, "learning_rate": 6.870229007633588e-05, "loss": 0.2335, "step": 90 }, { "epoch": 0.51, "learning_rate": 7.251908396946565e-05, "loss": 0.1595, "step": 95 }, { "epoch": 0.53, "learning_rate": 7.633587786259542e-05, "loss": 0.3264, "step": 100 }, { "epoch": 0.56, "learning_rate": 8.015267175572518e-05, "loss": 0.261, "step": 105 }, { "epoch": 0.59, "learning_rate": 8.396946564885496e-05, "loss": 0.1172, "step": 110 }, { "epoch": 0.61, "learning_rate": 8.778625954198472e-05, "loss": 0.2057, "step": 115 }, { "epoch": 0.64, "learning_rate": 9.160305343511451e-05, "loss": 0.4153, "step": 120 }, { "epoch": 0.67, "learning_rate": 9.541984732824429e-05, "loss": 0.4453, "step": 125 }, { "epoch": 0.69, "learning_rate": 9.923664122137405e-05, "loss": 0.2069, "step": 130 }, { "epoch": 0.72, "learning_rate": 0.00010305343511450383, "loss": 0.1238, "step": 135 }, { "epoch": 0.75, "learning_rate": 0.00010687022900763359, "loss": 0.3154, "step": 140 }, { "epoch": 0.77, "learning_rate": 0.00011068702290076336, "loss": 0.2941, "step": 145 }, { "epoch": 0.8, "learning_rate": 0.00011450381679389313, "loss": 0.3083, "step": 150 }, { "epoch": 0.83, "learning_rate": 0.0001183206106870229, "loss": 0.3601, "step": 155 }, { "epoch": 0.85, "learning_rate": 0.00012213740458015266, "loss": 0.3151, "step": 160 }, { "epoch": 0.88, "learning_rate": 0.00012595419847328244, "loss": 0.284, "step": 165 }, { "epoch": 0.91, "learning_rate": 0.00012977099236641222, "loss": 0.3097, "step": 170 }, { "epoch": 0.93, "learning_rate": 0.000133587786259542, "loss": 0.2929, "step": 175 }, { "epoch": 0.96, "learning_rate": 0.00013740458015267177, "loss": 0.1706, "step": 180 }, { "epoch": 0.99, "learning_rate": 0.00014122137404580154, "loss": 0.2297, "step": 185 }, { "epoch": 1.0, "eval_accuracy": 0.9231450719822812, "eval_loss": 0.3400990664958954, "eval_runtime": 2470.4126, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.229, "step": 187 }, { "epoch": 1.01, "learning_rate": 0.0001450381679389313, "loss": 0.3795, "step": 190 }, { "epoch": 1.04, "learning_rate": 0.00014885496183206107, "loss": 0.8246, "step": 195 }, { "epoch": 1.07, "learning_rate": 0.00015267175572519084, "loss": 0.4403, "step": 200 }, { "epoch": 1.09, "learning_rate": 0.00015648854961832062, "loss": 0.1943, "step": 205 }, { "epoch": 1.12, "learning_rate": 0.00016030534351145037, "loss": 0.3229, "step": 210 }, { "epoch": 1.15, "learning_rate": 0.00016412213740458014, "loss": 0.262, "step": 215 }, { "epoch": 1.17, "learning_rate": 0.00016793893129770992, "loss": 0.2737, "step": 220 }, { "epoch": 1.2, "learning_rate": 0.0001717557251908397, "loss": 0.1592, "step": 225 }, { "epoch": 1.23, "learning_rate": 0.00017557251908396944, "loss": 0.253, "step": 230 }, { "epoch": 1.26, "learning_rate": 0.00017938931297709925, "loss": 0.2214, "step": 235 }, { "epoch": 1.28, "learning_rate": 0.00018320610687022902, "loss": 0.3303, "step": 240 }, { "epoch": 1.31, "learning_rate": 0.0001870229007633588, "loss": 0.3262, "step": 245 }, { "epoch": 1.34, "learning_rate": 0.00019083969465648857, "loss": 0.2077, "step": 250 }, { "epoch": 1.36, "learning_rate": 0.00019465648854961832, "loss": 0.2725, "step": 255 }, { "epoch": 1.39, "learning_rate": 0.0001984732824427481, "loss": 0.1802, "step": 260 }, { "epoch": 1.42, "learning_rate": 0.00020229007633587788, "loss": 0.2252, "step": 265 }, { "epoch": 1.44, "learning_rate": 0.00020610687022900765, "loss": 0.2548, "step": 270 }, { "epoch": 1.47, "learning_rate": 0.0002099236641221374, "loss": 0.2303, "step": 275 }, { "epoch": 1.5, "learning_rate": 0.00021374045801526718, "loss": 0.5035, "step": 280 }, { "epoch": 1.52, "learning_rate": 0.00021755725190839695, "loss": 0.4469, "step": 285 }, { "epoch": 1.55, "learning_rate": 0.00022137404580152673, "loss": 0.3752, "step": 290 }, { "epoch": 1.58, "learning_rate": 0.00022519083969465648, "loss": 0.1244, "step": 295 }, { "epoch": 1.6, "learning_rate": 0.00022900763358778625, "loss": 0.1889, "step": 300 }, { "epoch": 1.63, "learning_rate": 0.00023282442748091603, "loss": 0.1583, "step": 305 }, { "epoch": 1.66, "learning_rate": 0.0002366412213740458, "loss": 0.2542, "step": 310 }, { "epoch": 1.68, "learning_rate": 0.00024045801526717558, "loss": 0.2861, "step": 315 }, { "epoch": 1.71, "learning_rate": 0.00024427480916030533, "loss": 0.1671, "step": 320 }, { "epoch": 1.74, "learning_rate": 0.00024809160305343513, "loss": 0.2157, "step": 325 }, { "epoch": 1.76, "learning_rate": 0.0002519083969465649, "loss": 0.2092, "step": 330 }, { "epoch": 1.79, "learning_rate": 0.00025572519083969463, "loss": 0.172, "step": 335 }, { "epoch": 1.82, "learning_rate": 0.00025954198473282443, "loss": 0.4118, "step": 340 }, { "epoch": 1.84, "learning_rate": 0.0002633587786259542, "loss": 0.2941, "step": 345 }, { "epoch": 1.87, "learning_rate": 0.000267175572519084, "loss": 0.1484, "step": 350 }, { "epoch": 1.9, "learning_rate": 0.00027099236641221373, "loss": 0.287, "step": 355 }, { "epoch": 1.92, "learning_rate": 0.00027480916030534353, "loss": 0.279, "step": 360 }, { "epoch": 1.95, "learning_rate": 0.0002786259541984733, "loss": 0.2117, "step": 365 }, { "epoch": 1.98, "learning_rate": 0.0002824427480916031, "loss": 0.3255, "step": 370 }, { "epoch": 2.0, "eval_accuracy": 0.9643410852713178, "eval_loss": 0.1010039672255516, "eval_runtime": 193.6164, "eval_samples_per_second": 23.319, "eval_steps_per_second": 2.918, "step": 374 }, { "epoch": 2.0, "learning_rate": 0.0002862595419847328, "loss": 0.193, "step": 375 }, { "epoch": 2.03, "learning_rate": 0.0002900763358778626, "loss": 0.3363, "step": 380 }, { "epoch": 2.06, "learning_rate": 0.0002938931297709924, "loss": 0.2783, "step": 385 }, { "epoch": 2.08, "learning_rate": 0.00029770992366412214, "loss": 0.1567, "step": 390 }, { "epoch": 2.11, "learning_rate": 0.00030152671755725194, "loss": 0.3207, "step": 395 }, { "epoch": 2.14, "learning_rate": 0.0003053435114503817, "loss": 0.2227, "step": 400 }, { "epoch": 2.16, "learning_rate": 0.0003091603053435115, "loss": 0.2319, "step": 405 }, { "epoch": 2.19, "learning_rate": 0.00031297709923664124, "loss": 0.2516, "step": 410 }, { "epoch": 2.22, "learning_rate": 0.000316793893129771, "loss": 0.198, "step": 415 }, { "epoch": 2.24, "learning_rate": 0.00032061068702290074, "loss": 0.3153, "step": 420 }, { "epoch": 2.27, "learning_rate": 0.00032442748091603054, "loss": 0.3282, "step": 425 }, { "epoch": 2.3, "learning_rate": 0.0003282442748091603, "loss": 0.305, "step": 430 }, { "epoch": 2.32, "learning_rate": 0.0003320610687022901, "loss": 0.4627, "step": 435 }, { "epoch": 2.35, "learning_rate": 0.00033587786259541984, "loss": 0.2607, "step": 440 }, { "epoch": 2.38, "learning_rate": 0.00033969465648854964, "loss": 0.235, "step": 445 }, { "epoch": 2.4, "learning_rate": 0.0003435114503816794, "loss": 0.2973, "step": 450 }, { "epoch": 2.43, "learning_rate": 0.0003473282442748092, "loss": 0.3968, "step": 455 }, { "epoch": 2.46, "learning_rate": 0.0003511450381679389, "loss": 0.2779, "step": 460 }, { "epoch": 2.48, "learning_rate": 0.0003549618320610687, "loss": 0.242, "step": 465 }, { "epoch": 2.51, "learning_rate": 0.0003587786259541985, "loss": 0.1816, "step": 470 }, { "epoch": 2.54, "learning_rate": 0.00036259541984732824, "loss": 0.2947, "step": 475 }, { "epoch": 2.56, "learning_rate": 0.00036641221374045805, "loss": 0.2522, "step": 480 }, { "epoch": 2.59, "learning_rate": 0.0003702290076335878, "loss": 0.4026, "step": 485 }, { "epoch": 2.62, "learning_rate": 0.0003740458015267176, "loss": 0.3203, "step": 490 }, { "epoch": 2.64, "learning_rate": 0.00037786259541984735, "loss": 0.1702, "step": 495 }, { "epoch": 2.67, "learning_rate": 0.00038167938931297715, "loss": 0.2985, "step": 500 }, { "epoch": 2.7, "learning_rate": 0.00038549618320610684, "loss": 0.3186, "step": 505 }, { "epoch": 2.72, "learning_rate": 0.00038931297709923665, "loss": 0.1724, "step": 510 }, { "epoch": 2.75, "learning_rate": 0.0003931297709923664, "loss": 0.3203, "step": 515 }, { "epoch": 2.78, "learning_rate": 0.0003969465648854962, "loss": 0.4068, "step": 520 }, { "epoch": 2.8, "learning_rate": 0.00040076335877862595, "loss": 0.247, "step": 525 }, { "epoch": 2.83, "learning_rate": 0.00040458015267175575, "loss": 0.2256, "step": 530 }, { "epoch": 2.86, "learning_rate": 0.0004083969465648855, "loss": 0.2111, "step": 535 }, { "epoch": 2.88, "learning_rate": 0.0004122137404580153, "loss": 0.2228, "step": 540 }, { "epoch": 2.91, "learning_rate": 0.00041603053435114505, "loss": 0.1618, "step": 545 }, { "epoch": 2.94, "learning_rate": 0.0004198473282442748, "loss": 0.1953, "step": 550 }, { "epoch": 2.96, "learning_rate": 0.00042366412213740455, "loss": 0.4014, "step": 555 }, { "epoch": 2.99, "learning_rate": 0.00042748091603053435, "loss": 0.4962, "step": 560 }, { "epoch": 3.0, "eval_accuracy": 0.9607973421926911, "eval_loss": 0.09668433666229248, "eval_runtime": 193.2648, "eval_samples_per_second": 23.362, "eval_steps_per_second": 2.923, "step": 561 }, { "epoch": 3.02, "learning_rate": 0.00043129770992366415, "loss": 0.1603, "step": 565 }, { "epoch": 3.04, "learning_rate": 0.0004351145038167939, "loss": 0.1989, "step": 570 }, { "epoch": 3.07, "learning_rate": 0.0004389312977099237, "loss": 0.126, "step": 575 }, { "epoch": 3.1, "learning_rate": 0.00044274809160305345, "loss": 0.4359, "step": 580 }, { "epoch": 3.12, "learning_rate": 0.00044656488549618326, "loss": 0.2178, "step": 585 }, { "epoch": 3.15, "learning_rate": 0.00045038167938931295, "loss": 0.2256, "step": 590 }, { "epoch": 3.18, "learning_rate": 0.00045419847328244275, "loss": 0.2226, "step": 595 }, { "epoch": 3.2, "learning_rate": 0.0004580152671755725, "loss": 0.2424, "step": 600 }, { "epoch": 3.23, "learning_rate": 0.0004618320610687023, "loss": 0.2644, "step": 605 }, { "epoch": 3.26, "learning_rate": 0.00046564885496183206, "loss": 0.2048, "step": 610 }, { "epoch": 3.28, "learning_rate": 0.00046946564885496186, "loss": 0.2766, "step": 615 }, { "epoch": 3.31, "learning_rate": 0.0004732824427480916, "loss": 0.2327, "step": 620 }, { "epoch": 3.34, "learning_rate": 0.0004770992366412214, "loss": 0.2662, "step": 625 }, { "epoch": 3.36, "learning_rate": 0.00048091603053435116, "loss": 0.1448, "step": 630 }, { "epoch": 3.39, "learning_rate": 0.0004847328244274809, "loss": 0.2287, "step": 635 }, { "epoch": 3.42, "learning_rate": 0.0004885496183206107, "loss": 0.1811, "step": 640 }, { "epoch": 3.44, "learning_rate": 0.0004923664122137404, "loss": 0.2301, "step": 645 }, { "epoch": 3.47, "learning_rate": 0.0004961832061068703, "loss": 0.2068, "step": 650 }, { "epoch": 3.5, "learning_rate": 0.0005, "loss": 0.1134, "step": 655 }, { "epoch": 3.52, "learning_rate": 0.000496177370030581, "loss": 0.215, "step": 660 }, { "epoch": 3.55, "learning_rate": 0.0004923547400611621, "loss": 0.2762, "step": 665 }, { "epoch": 3.58, "learning_rate": 0.0004885321100917432, "loss": 0.2555, "step": 670 }, { "epoch": 3.6, "learning_rate": 0.00048470948012232416, "loss": 0.3345, "step": 675 }, { "epoch": 3.63, "learning_rate": 0.00048088685015290524, "loss": 0.1561, "step": 680 }, { "epoch": 3.66, "learning_rate": 0.00047706422018348627, "loss": 0.1449, "step": 685 }, { "epoch": 3.68, "learning_rate": 0.00047324159021406724, "loss": 0.3557, "step": 690 }, { "epoch": 3.71, "learning_rate": 0.0004694189602446483, "loss": 0.5036, "step": 695 }, { "epoch": 3.74, "learning_rate": 0.00046559633027522934, "loss": 0.1832, "step": 700 }, { "epoch": 3.77, "learning_rate": 0.00046177370030581037, "loss": 0.4736, "step": 705 }, { "epoch": 3.79, "learning_rate": 0.00045795107033639145, "loss": 0.2116, "step": 710 }, { "epoch": 3.82, "learning_rate": 0.0004541284403669725, "loss": 0.1522, "step": 715 }, { "epoch": 3.85, "learning_rate": 0.00045030581039755355, "loss": 0.1374, "step": 720 }, { "epoch": 3.87, "learning_rate": 0.0004464831804281346, "loss": 0.2018, "step": 725 }, { "epoch": 3.9, "learning_rate": 0.0004426605504587156, "loss": 0.1472, "step": 730 }, { "epoch": 3.93, "learning_rate": 0.0004388379204892967, "loss": 0.2752, "step": 735 }, { "epoch": 3.95, "learning_rate": 0.0004350152905198777, "loss": 0.2655, "step": 740 }, { "epoch": 3.98, "learning_rate": 0.00043119266055045873, "loss": 0.181, "step": 745 }, { "epoch": 4.0, "eval_accuracy": 0.9295681063122924, "eval_loss": 0.16242608428001404, "eval_runtime": 193.174, "eval_samples_per_second": 23.373, "eval_steps_per_second": 2.925, "step": 749 }, { "epoch": 4.01, "learning_rate": 0.00042737003058103976, "loss": 0.1413, "step": 750 }, { "epoch": 4.03, "learning_rate": 0.0004235474006116208, "loss": 0.1415, "step": 755 }, { "epoch": 4.06, "learning_rate": 0.0004197247706422018, "loss": 0.1483, "step": 760 }, { "epoch": 4.09, "learning_rate": 0.0004159021406727829, "loss": 0.2537, "step": 765 }, { "epoch": 4.11, "learning_rate": 0.0004120795107033639, "loss": 0.2672, "step": 770 }, { "epoch": 4.14, "learning_rate": 0.00040825688073394494, "loss": 0.1963, "step": 775 }, { "epoch": 4.17, "learning_rate": 0.000404434250764526, "loss": 0.1984, "step": 780 }, { "epoch": 4.19, "learning_rate": 0.00040061162079510704, "loss": 0.171, "step": 785 }, { "epoch": 4.22, "learning_rate": 0.00039678899082568807, "loss": 0.2535, "step": 790 }, { "epoch": 4.25, "learning_rate": 0.00039296636085626915, "loss": 0.2084, "step": 795 }, { "epoch": 4.27, "learning_rate": 0.00038914373088685017, "loss": 0.122, "step": 800 }, { "epoch": 4.3, "learning_rate": 0.0003853211009174312, "loss": 0.1591, "step": 805 }, { "epoch": 4.33, "learning_rate": 0.0003814984709480123, "loss": 0.2064, "step": 810 }, { "epoch": 4.35, "learning_rate": 0.00037767584097859325, "loss": 0.1932, "step": 815 }, { "epoch": 4.38, "learning_rate": 0.00037385321100917427, "loss": 0.1794, "step": 820 }, { "epoch": 4.41, "learning_rate": 0.00037003058103975535, "loss": 0.1727, "step": 825 }, { "epoch": 4.43, "learning_rate": 0.0003662079510703364, "loss": 0.0987, "step": 830 }, { "epoch": 4.46, "learning_rate": 0.00036238532110091746, "loss": 0.1894, "step": 835 }, { "epoch": 4.49, "learning_rate": 0.0003585626911314985, "loss": 0.1725, "step": 840 }, { "epoch": 4.51, "learning_rate": 0.0003547400611620795, "loss": 0.1334, "step": 845 }, { "epoch": 4.54, "learning_rate": 0.0003509174311926606, "loss": 0.1386, "step": 850 }, { "epoch": 4.57, "learning_rate": 0.0003470948012232416, "loss": 0.2012, "step": 855 }, { "epoch": 4.59, "learning_rate": 0.00034327217125382264, "loss": 0.4956, "step": 860 }, { "epoch": 4.62, "learning_rate": 0.0003394495412844037, "loss": 0.16, "step": 865 }, { "epoch": 4.65, "learning_rate": 0.00033562691131498474, "loss": 0.2497, "step": 870 }, { "epoch": 4.67, "learning_rate": 0.00033180428134556576, "loss": 0.2201, "step": 875 }, { "epoch": 4.7, "learning_rate": 0.0003279816513761468, "loss": 0.1424, "step": 880 }, { "epoch": 4.73, "learning_rate": 0.0003241590214067278, "loss": 0.1505, "step": 885 }, { "epoch": 4.75, "learning_rate": 0.00032033639143730884, "loss": 0.1896, "step": 890 }, { "epoch": 4.78, "learning_rate": 0.0003165137614678899, "loss": 0.1165, "step": 895 }, { "epoch": 4.81, "learning_rate": 0.00031269113149847094, "loss": 0.0779, "step": 900 }, { "epoch": 4.83, "learning_rate": 0.00030886850152905197, "loss": 0.1667, "step": 905 }, { "epoch": 4.86, "learning_rate": 0.00030504587155963305, "loss": 0.103, "step": 910 }, { "epoch": 4.89, "learning_rate": 0.0003012232415902141, "loss": 0.2255, "step": 915 }, { "epoch": 4.91, "learning_rate": 0.0002974006116207951, "loss": 0.2617, "step": 920 }, { "epoch": 4.94, "learning_rate": 0.0002935779816513762, "loss": 0.2884, "step": 925 }, { "epoch": 4.97, "learning_rate": 0.0002897553516819572, "loss": 0.1863, "step": 930 }, { "epoch": 4.99, "learning_rate": 0.0002859327217125383, "loss": 0.2266, "step": 935 }, { "epoch": 5.0, "eval_accuracy": 0.9614617940199336, "eval_loss": 0.09752781689167023, "eval_runtime": 193.4672, "eval_samples_per_second": 23.337, "eval_steps_per_second": 2.92, "step": 936 }, { "epoch": 5.02, "learning_rate": 0.00028211009174311925, "loss": 0.1735, "step": 940 }, { "epoch": 5.05, "learning_rate": 0.0002782874617737003, "loss": 0.1106, "step": 945 }, { "epoch": 5.07, "learning_rate": 0.00027446483180428136, "loss": 0.1575, "step": 950 }, { "epoch": 5.1, "learning_rate": 0.0002706422018348624, "loss": 0.1547, "step": 955 }, { "epoch": 5.13, "learning_rate": 0.0002668195718654434, "loss": 0.1507, "step": 960 }, { "epoch": 5.15, "learning_rate": 0.0002629969418960245, "loss": 0.1887, "step": 965 }, { "epoch": 5.18, "learning_rate": 0.0002591743119266055, "loss": 0.1534, "step": 970 }, { "epoch": 5.21, "learning_rate": 0.00025535168195718654, "loss": 0.0758, "step": 975 }, { "epoch": 5.23, "learning_rate": 0.0002515290519877676, "loss": 0.0655, "step": 980 }, { "epoch": 5.26, "learning_rate": 0.00024770642201834864, "loss": 0.1125, "step": 985 }, { "epoch": 5.29, "learning_rate": 0.00024388379204892967, "loss": 0.1932, "step": 990 }, { "epoch": 5.31, "learning_rate": 0.0002400611620795107, "loss": 0.1113, "step": 995 }, { "epoch": 5.34, "learning_rate": 0.00023623853211009174, "loss": 0.0912, "step": 1000 }, { "epoch": 5.37, "learning_rate": 0.0002324159021406728, "loss": 0.1605, "step": 1005 }, { "epoch": 5.39, "learning_rate": 0.00022859327217125385, "loss": 0.1793, "step": 1010 }, { "epoch": 5.42, "learning_rate": 0.00022477064220183487, "loss": 0.0705, "step": 1015 }, { "epoch": 5.45, "learning_rate": 0.0002209480122324159, "loss": 0.0778, "step": 1020 }, { "epoch": 5.47, "learning_rate": 0.00021712538226299695, "loss": 0.1484, "step": 1025 }, { "epoch": 5.5, "learning_rate": 0.00021330275229357798, "loss": 0.0914, "step": 1030 }, { "epoch": 5.53, "learning_rate": 0.00020948012232415903, "loss": 0.0886, "step": 1035 }, { "epoch": 5.55, "learning_rate": 0.00020565749235474008, "loss": 0.1647, "step": 1040 }, { "epoch": 5.58, "learning_rate": 0.0002018348623853211, "loss": 0.0911, "step": 1045 }, { "epoch": 5.61, "learning_rate": 0.00019801223241590213, "loss": 0.0603, "step": 1050 }, { "epoch": 5.63, "learning_rate": 0.00019418960244648318, "loss": 0.097, "step": 1055 }, { "epoch": 5.66, "learning_rate": 0.0001903669724770642, "loss": 0.1378, "step": 1060 }, { "epoch": 5.69, "learning_rate": 0.00018654434250764526, "loss": 0.1187, "step": 1065 }, { "epoch": 5.71, "learning_rate": 0.0001827217125382263, "loss": 0.1202, "step": 1070 }, { "epoch": 5.74, "learning_rate": 0.00017889908256880736, "loss": 0.1565, "step": 1075 }, { "epoch": 5.77, "learning_rate": 0.00017507645259938836, "loss": 0.0996, "step": 1080 }, { "epoch": 5.79, "learning_rate": 0.00017125382262996941, "loss": 0.0821, "step": 1085 }, { "epoch": 5.82, "learning_rate": 0.00016743119266055047, "loss": 0.084, "step": 1090 }, { "epoch": 5.85, "learning_rate": 0.0001636085626911315, "loss": 0.1211, "step": 1095 }, { "epoch": 5.87, "learning_rate": 0.00015978593272171254, "loss": 0.1592, "step": 1100 }, { "epoch": 5.9, "learning_rate": 0.0001559633027522936, "loss": 0.1067, "step": 1105 }, { "epoch": 5.93, "learning_rate": 0.00015214067278287462, "loss": 0.111, "step": 1110 }, { "epoch": 5.95, "learning_rate": 0.00014831804281345565, "loss": 0.0776, "step": 1115 }, { "epoch": 5.98, "learning_rate": 0.0001444954128440367, "loss": 0.0981, "step": 1120 }, { "epoch": 6.0, "eval_accuracy": 0.9782945736434109, "eval_loss": 0.05765698477625847, "eval_runtime": 193.5485, "eval_samples_per_second": 23.327, "eval_steps_per_second": 2.919, "step": 1123 }, { "epoch": 6.01, "learning_rate": 0.00014067278287461775, "loss": 0.1255, "step": 1125 }, { "epoch": 6.03, "learning_rate": 0.00013685015290519878, "loss": 0.093, "step": 1130 }, { "epoch": 6.06, "learning_rate": 0.00013302752293577983, "loss": 0.1009, "step": 1135 }, { "epoch": 6.09, "learning_rate": 0.00012920489296636088, "loss": 0.0498, "step": 1140 }, { "epoch": 6.11, "learning_rate": 0.00012538226299694188, "loss": 0.0594, "step": 1145 }, { "epoch": 6.14, "learning_rate": 0.00012155963302752294, "loss": 0.0882, "step": 1150 }, { "epoch": 6.17, "learning_rate": 0.00011773700305810397, "loss": 0.3622, "step": 1155 }, { "epoch": 6.19, "learning_rate": 0.00011391437308868502, "loss": 0.0536, "step": 1160 }, { "epoch": 6.22, "learning_rate": 0.00011009174311926606, "loss": 0.0691, "step": 1165 }, { "epoch": 6.25, "learning_rate": 0.0001062691131498471, "loss": 0.0965, "step": 1170 }, { "epoch": 6.28, "learning_rate": 0.00010244648318042814, "loss": 0.0614, "step": 1175 }, { "epoch": 6.3, "learning_rate": 9.862385321100918e-05, "loss": 0.041, "step": 1180 }, { "epoch": 6.33, "learning_rate": 9.480122324159021e-05, "loss": 0.0135, "step": 1185 }, { "epoch": 6.36, "learning_rate": 9.097859327217125e-05, "loss": 0.0938, "step": 1190 }, { "epoch": 6.38, "learning_rate": 8.71559633027523e-05, "loss": 0.1184, "step": 1195 }, { "epoch": 6.41, "learning_rate": 8.333333333333333e-05, "loss": 0.1004, "step": 1200 }, { "epoch": 6.44, "learning_rate": 7.951070336391437e-05, "loss": 0.0319, "step": 1205 }, { "epoch": 6.46, "learning_rate": 7.568807339449542e-05, "loss": 0.1098, "step": 1210 }, { "epoch": 6.49, "learning_rate": 7.186544342507645e-05, "loss": 0.1152, "step": 1215 }, { "epoch": 6.52, "learning_rate": 6.80428134556575e-05, "loss": 0.077, "step": 1220 }, { "epoch": 6.54, "learning_rate": 6.422018348623854e-05, "loss": 0.0487, "step": 1225 }, { "epoch": 6.57, "learning_rate": 6.0397553516819576e-05, "loss": 0.0873, "step": 1230 }, { "epoch": 6.6, "learning_rate": 5.6574923547400615e-05, "loss": 0.0838, "step": 1235 }, { "epoch": 6.62, "learning_rate": 5.275229357798165e-05, "loss": 0.0458, "step": 1240 }, { "epoch": 6.65, "learning_rate": 4.892966360856269e-05, "loss": 0.1171, "step": 1245 }, { "epoch": 6.68, "learning_rate": 4.510703363914373e-05, "loss": 0.0412, "step": 1250 }, { "epoch": 6.7, "learning_rate": 4.1284403669724776e-05, "loss": 0.0484, "step": 1255 }, { "epoch": 6.73, "learning_rate": 3.746177370030581e-05, "loss": 0.1255, "step": 1260 }, { "epoch": 6.76, "learning_rate": 3.3639143730886846e-05, "loss": 0.1212, "step": 1265 }, { "epoch": 6.78, "learning_rate": 2.9816513761467892e-05, "loss": 0.0265, "step": 1270 }, { "epoch": 6.81, "learning_rate": 2.599388379204893e-05, "loss": 0.0676, "step": 1275 }, { "epoch": 6.84, "learning_rate": 2.217125382262997e-05, "loss": 0.0216, "step": 1280 }, { "epoch": 6.86, "learning_rate": 1.834862385321101e-05, "loss": 0.0113, "step": 1285 }, { "epoch": 6.89, "learning_rate": 1.452599388379205e-05, "loss": 0.0254, "step": 1290 }, { "epoch": 6.92, "learning_rate": 1.0703363914373088e-05, "loss": 0.0666, "step": 1295 }, { "epoch": 6.94, "learning_rate": 6.880733944954129e-06, "loss": 0.0762, "step": 1300 }, { "epoch": 6.97, "learning_rate": 3.0581039755351682e-06, "loss": 0.0629, "step": 1305 }, { "epoch": 6.99, "eval_accuracy": 0.992469545957918, "eval_loss": 0.0190800242125988, "eval_runtime": 193.283, "eval_samples_per_second": 23.36, "eval_steps_per_second": 2.923, "step": 1309 }, { "epoch": 6.99, "step": 1309, "total_flos": 9.636137349860819e+18, "train_loss": 0.2139335812344817, "train_runtime": 11717.3663, "train_samples_per_second": 3.575, "train_steps_per_second": 0.112 } ], "logging_steps": 5, "max_steps": 1309, "num_train_epochs": 7, "save_steps": 500, "total_flos": 9.636137349860819e+18, "trial_name": null, "trial_params": null }