{ "best_metric": 0.8735, "best_model_checkpoint": "swin-tiny-finetuned-cifar100/checkpoint-3905", "epoch": 4.99968, "global_step": 3905, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.115089514066497e-07, "loss": 4.6445, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.0230179028132994e-06, "loss": 4.6363, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.534526854219949e-06, "loss": 4.6361, "step": 15 }, { "epoch": 0.03, "learning_rate": 2.0460358056265987e-06, "loss": 4.6511, "step": 20 }, { "epoch": 0.03, "learning_rate": 2.5575447570332483e-06, "loss": 4.6596, "step": 25 }, { "epoch": 0.04, "learning_rate": 3.069053708439898e-06, "loss": 4.6345, "step": 30 }, { "epoch": 0.04, "learning_rate": 3.5805626598465474e-06, "loss": 4.6177, "step": 35 }, { "epoch": 0.05, "learning_rate": 4.092071611253197e-06, "loss": 4.6046, "step": 40 }, { "epoch": 0.06, "learning_rate": 4.603580562659847e-06, "loss": 4.6372, "step": 45 }, { "epoch": 0.06, "learning_rate": 5.1150895140664966e-06, "loss": 4.5815, "step": 50 }, { "epoch": 0.07, "learning_rate": 5.626598465473146e-06, "loss": 4.5585, "step": 55 }, { "epoch": 0.08, "learning_rate": 6.138107416879796e-06, "loss": 4.6204, "step": 60 }, { "epoch": 0.08, "learning_rate": 6.649616368286445e-06, "loss": 4.5496, "step": 65 }, { "epoch": 0.09, "learning_rate": 7.161125319693095e-06, "loss": 4.5638, "step": 70 }, { "epoch": 0.1, "learning_rate": 7.672634271099745e-06, "loss": 4.5451, "step": 75 }, { "epoch": 0.1, "learning_rate": 8.184143222506395e-06, "loss": 4.5389, "step": 80 }, { "epoch": 0.11, "learning_rate": 8.695652173913044e-06, "loss": 4.4942, "step": 85 }, { "epoch": 0.12, "learning_rate": 9.207161125319694e-06, "loss": 4.4929, "step": 90 }, { "epoch": 0.12, "learning_rate": 9.718670076726344e-06, "loss": 4.4607, "step": 95 }, { "epoch": 0.13, "learning_rate": 1.0230179028132993e-05, "loss": 4.4665, "step": 100 }, { "epoch": 0.13, "learning_rate": 1.0741687979539643e-05, "loss": 4.438, "step": 105 }, { "epoch": 0.14, "learning_rate": 1.1253196930946292e-05, "loss": 4.415, "step": 110 }, { "epoch": 0.15, "learning_rate": 1.1764705882352942e-05, "loss": 4.3831, "step": 115 }, { "epoch": 0.15, "learning_rate": 1.2276214833759591e-05, "loss": 4.3877, "step": 120 }, { "epoch": 0.16, "learning_rate": 1.2787723785166241e-05, "loss": 4.3453, "step": 125 }, { "epoch": 0.17, "learning_rate": 1.329923273657289e-05, "loss": 4.3025, "step": 130 }, { "epoch": 0.17, "learning_rate": 1.381074168797954e-05, "loss": 4.2795, "step": 135 }, { "epoch": 0.18, "learning_rate": 1.432225063938619e-05, "loss": 4.2353, "step": 140 }, { "epoch": 0.19, "learning_rate": 1.483375959079284e-05, "loss": 4.1786, "step": 145 }, { "epoch": 0.19, "learning_rate": 1.534526854219949e-05, "loss": 4.1939, "step": 150 }, { "epoch": 0.2, "learning_rate": 1.585677749360614e-05, "loss": 4.0723, "step": 155 }, { "epoch": 0.2, "learning_rate": 1.636828644501279e-05, "loss": 4.0102, "step": 160 }, { "epoch": 0.21, "learning_rate": 1.687979539641944e-05, "loss": 3.9292, "step": 165 }, { "epoch": 0.22, "learning_rate": 1.739130434782609e-05, "loss": 3.8505, "step": 170 }, { "epoch": 0.22, "learning_rate": 1.790281329923274e-05, "loss": 3.7539, "step": 175 }, { "epoch": 0.23, "learning_rate": 1.8414322250639388e-05, "loss": 3.6833, "step": 180 }, { "epoch": 0.24, "learning_rate": 1.8925831202046038e-05, "loss": 3.5281, "step": 185 }, { "epoch": 0.24, "learning_rate": 1.9437340153452687e-05, "loss": 3.4329, "step": 190 }, { "epoch": 0.25, "learning_rate": 1.9948849104859337e-05, "loss": 3.3203, "step": 195 }, { "epoch": 0.26, "learning_rate": 2.0460358056265986e-05, "loss": 3.1954, "step": 200 }, { "epoch": 0.26, "learning_rate": 2.0971867007672636e-05, "loss": 3.0686, "step": 205 }, { "epoch": 0.27, "learning_rate": 2.1483375959079285e-05, "loss": 2.9424, "step": 210 }, { "epoch": 0.28, "learning_rate": 2.1994884910485935e-05, "loss": 3.0176, "step": 215 }, { "epoch": 0.28, "learning_rate": 2.2506393861892585e-05, "loss": 2.8912, "step": 220 }, { "epoch": 0.29, "learning_rate": 2.3017902813299234e-05, "loss": 2.7802, "step": 225 }, { "epoch": 0.29, "learning_rate": 2.3529411764705884e-05, "loss": 2.7013, "step": 230 }, { "epoch": 0.3, "learning_rate": 2.4040920716112533e-05, "loss": 2.6416, "step": 235 }, { "epoch": 0.31, "learning_rate": 2.4552429667519183e-05, "loss": 2.5482, "step": 240 }, { "epoch": 0.31, "learning_rate": 2.5063938618925832e-05, "loss": 2.3947, "step": 245 }, { "epoch": 0.32, "learning_rate": 2.5575447570332482e-05, "loss": 2.2859, "step": 250 }, { "epoch": 0.33, "learning_rate": 2.608695652173913e-05, "loss": 2.2609, "step": 255 }, { "epoch": 0.33, "learning_rate": 2.659846547314578e-05, "loss": 2.2065, "step": 260 }, { "epoch": 0.34, "learning_rate": 2.710997442455243e-05, "loss": 2.1006, "step": 265 }, { "epoch": 0.35, "learning_rate": 2.762148337595908e-05, "loss": 1.9752, "step": 270 }, { "epoch": 0.35, "learning_rate": 2.813299232736573e-05, "loss": 2.0412, "step": 275 }, { "epoch": 0.36, "learning_rate": 2.864450127877238e-05, "loss": 2.0338, "step": 280 }, { "epoch": 0.36, "learning_rate": 2.915601023017903e-05, "loss": 1.8349, "step": 285 }, { "epoch": 0.37, "learning_rate": 2.966751918158568e-05, "loss": 1.7146, "step": 290 }, { "epoch": 0.38, "learning_rate": 3.0179028132992328e-05, "loss": 1.7222, "step": 295 }, { "epoch": 0.38, "learning_rate": 3.069053708439898e-05, "loss": 1.8153, "step": 300 }, { "epoch": 0.39, "learning_rate": 3.120204603580563e-05, "loss": 1.6485, "step": 305 }, { "epoch": 0.4, "learning_rate": 3.171355498721228e-05, "loss": 1.485, "step": 310 }, { "epoch": 0.4, "learning_rate": 3.222506393861893e-05, "loss": 1.7435, "step": 315 }, { "epoch": 0.41, "learning_rate": 3.273657289002558e-05, "loss": 1.6707, "step": 320 }, { "epoch": 0.42, "learning_rate": 3.324808184143223e-05, "loss": 1.5172, "step": 325 }, { "epoch": 0.42, "learning_rate": 3.375959079283888e-05, "loss": 1.538, "step": 330 }, { "epoch": 0.43, "learning_rate": 3.427109974424553e-05, "loss": 1.424, "step": 335 }, { "epoch": 0.44, "learning_rate": 3.478260869565218e-05, "loss": 1.3758, "step": 340 }, { "epoch": 0.44, "learning_rate": 3.529411764705883e-05, "loss": 1.3251, "step": 345 }, { "epoch": 0.45, "learning_rate": 3.580562659846548e-05, "loss": 1.3147, "step": 350 }, { "epoch": 0.45, "learning_rate": 3.6317135549872126e-05, "loss": 1.3606, "step": 355 }, { "epoch": 0.46, "learning_rate": 3.6828644501278776e-05, "loss": 1.3198, "step": 360 }, { "epoch": 0.47, "learning_rate": 3.7340153452685426e-05, "loss": 1.4826, "step": 365 }, { "epoch": 0.47, "learning_rate": 3.7851662404092075e-05, "loss": 1.1348, "step": 370 }, { "epoch": 0.48, "learning_rate": 3.8363171355498725e-05, "loss": 1.2849, "step": 375 }, { "epoch": 0.49, "learning_rate": 3.8874680306905374e-05, "loss": 1.2261, "step": 380 }, { "epoch": 0.49, "learning_rate": 3.9386189258312024e-05, "loss": 1.175, "step": 385 }, { "epoch": 0.5, "learning_rate": 3.989769820971867e-05, "loss": 1.1654, "step": 390 }, { "epoch": 0.51, "learning_rate": 3.9954467842914065e-05, "loss": 1.2218, "step": 395 }, { "epoch": 0.51, "learning_rate": 3.989755264655663e-05, "loss": 1.0849, "step": 400 }, { "epoch": 0.52, "learning_rate": 3.9840637450199205e-05, "loss": 1.1411, "step": 405 }, { "epoch": 0.52, "learning_rate": 3.978372225384178e-05, "loss": 1.168, "step": 410 }, { "epoch": 0.53, "learning_rate": 3.972680705748435e-05, "loss": 1.2054, "step": 415 }, { "epoch": 0.54, "learning_rate": 3.9669891861126925e-05, "loss": 1.1557, "step": 420 }, { "epoch": 0.54, "learning_rate": 3.96129766647695e-05, "loss": 1.119, "step": 425 }, { "epoch": 0.55, "learning_rate": 3.955606146841207e-05, "loss": 1.1867, "step": 430 }, { "epoch": 0.56, "learning_rate": 3.949914627205464e-05, "loss": 0.9051, "step": 435 }, { "epoch": 0.56, "learning_rate": 3.944223107569721e-05, "loss": 1.0337, "step": 440 }, { "epoch": 0.57, "learning_rate": 3.9385315879339785e-05, "loss": 0.9839, "step": 445 }, { "epoch": 0.58, "learning_rate": 3.932840068298236e-05, "loss": 0.866, "step": 450 }, { "epoch": 0.58, "learning_rate": 3.927148548662493e-05, "loss": 0.9547, "step": 455 }, { "epoch": 0.59, "learning_rate": 3.9214570290267505e-05, "loss": 0.9838, "step": 460 }, { "epoch": 0.6, "learning_rate": 3.915765509391008e-05, "loss": 1.1078, "step": 465 }, { "epoch": 0.6, "learning_rate": 3.910073989755265e-05, "loss": 0.932, "step": 470 }, { "epoch": 0.61, "learning_rate": 3.9043824701195225e-05, "loss": 0.9879, "step": 475 }, { "epoch": 0.61, "learning_rate": 3.898690950483779e-05, "loss": 1.1054, "step": 480 }, { "epoch": 0.62, "learning_rate": 3.8929994308480365e-05, "loss": 0.9784, "step": 485 }, { "epoch": 0.63, "learning_rate": 3.887307911212294e-05, "loss": 1.0294, "step": 490 }, { "epoch": 0.63, "learning_rate": 3.881616391576551e-05, "loss": 0.946, "step": 495 }, { "epoch": 0.64, "learning_rate": 3.8759248719408085e-05, "loss": 0.9403, "step": 500 }, { "epoch": 0.65, "learning_rate": 3.870233352305066e-05, "loss": 0.8587, "step": 505 }, { "epoch": 0.65, "learning_rate": 3.864541832669323e-05, "loss": 0.881, "step": 510 }, { "epoch": 0.66, "learning_rate": 3.85885031303358e-05, "loss": 0.904, "step": 515 }, { "epoch": 0.67, "learning_rate": 3.853158793397838e-05, "loss": 0.9147, "step": 520 }, { "epoch": 0.67, "learning_rate": 3.8474672737620945e-05, "loss": 0.9299, "step": 525 }, { "epoch": 0.68, "learning_rate": 3.841775754126352e-05, "loss": 0.9064, "step": 530 }, { "epoch": 0.68, "learning_rate": 3.836084234490609e-05, "loss": 1.0158, "step": 535 }, { "epoch": 0.69, "learning_rate": 3.8303927148548666e-05, "loss": 0.8712, "step": 540 }, { "epoch": 0.7, "learning_rate": 3.824701195219124e-05, "loss": 0.8876, "step": 545 }, { "epoch": 0.7, "learning_rate": 3.819009675583381e-05, "loss": 0.9165, "step": 550 }, { "epoch": 0.71, "learning_rate": 3.8133181559476386e-05, "loss": 0.7727, "step": 555 }, { "epoch": 0.72, "learning_rate": 3.807626636311895e-05, "loss": 0.9544, "step": 560 }, { "epoch": 0.72, "learning_rate": 3.8019351166761526e-05, "loss": 0.8282, "step": 565 }, { "epoch": 0.73, "learning_rate": 3.79624359704041e-05, "loss": 0.8105, "step": 570 }, { "epoch": 0.74, "learning_rate": 3.790552077404667e-05, "loss": 0.8246, "step": 575 }, { "epoch": 0.74, "learning_rate": 3.7848605577689246e-05, "loss": 0.8071, "step": 580 }, { "epoch": 0.75, "learning_rate": 3.779169038133182e-05, "loss": 0.964, "step": 585 }, { "epoch": 0.76, "learning_rate": 3.773477518497439e-05, "loss": 0.8634, "step": 590 }, { "epoch": 0.76, "learning_rate": 3.7677859988616966e-05, "loss": 0.7772, "step": 595 }, { "epoch": 0.77, "learning_rate": 3.762094479225954e-05, "loss": 0.8086, "step": 600 }, { "epoch": 0.77, "learning_rate": 3.7564029595902106e-05, "loss": 0.8886, "step": 605 }, { "epoch": 0.78, "learning_rate": 3.750711439954468e-05, "loss": 0.8618, "step": 610 }, { "epoch": 0.79, "learning_rate": 3.745019920318725e-05, "loss": 0.8221, "step": 615 }, { "epoch": 0.79, "learning_rate": 3.7393284006829826e-05, "loss": 0.9336, "step": 620 }, { "epoch": 0.8, "learning_rate": 3.73363688104724e-05, "loss": 0.7384, "step": 625 }, { "epoch": 0.81, "learning_rate": 3.727945361411497e-05, "loss": 0.7313, "step": 630 }, { "epoch": 0.81, "learning_rate": 3.7222538417757546e-05, "loss": 0.9178, "step": 635 }, { "epoch": 0.82, "learning_rate": 3.716562322140011e-05, "loss": 0.8866, "step": 640 }, { "epoch": 0.83, "learning_rate": 3.710870802504269e-05, "loss": 0.6832, "step": 645 }, { "epoch": 0.83, "learning_rate": 3.705179282868526e-05, "loss": 0.776, "step": 650 }, { "epoch": 0.84, "learning_rate": 3.699487763232783e-05, "loss": 0.7298, "step": 655 }, { "epoch": 0.84, "learning_rate": 3.6937962435970406e-05, "loss": 0.7268, "step": 660 }, { "epoch": 0.85, "learning_rate": 3.688104723961298e-05, "loss": 0.8073, "step": 665 }, { "epoch": 0.86, "learning_rate": 3.682413204325555e-05, "loss": 0.7678, "step": 670 }, { "epoch": 0.86, "learning_rate": 3.6767216846898126e-05, "loss": 0.8216, "step": 675 }, { "epoch": 0.87, "learning_rate": 3.67103016505407e-05, "loss": 0.6896, "step": 680 }, { "epoch": 0.88, "learning_rate": 3.6653386454183266e-05, "loss": 0.8691, "step": 685 }, { "epoch": 0.88, "learning_rate": 3.659647125782584e-05, "loss": 0.8097, "step": 690 }, { "epoch": 0.89, "learning_rate": 3.653955606146841e-05, "loss": 0.7124, "step": 695 }, { "epoch": 0.9, "learning_rate": 3.6482640865110987e-05, "loss": 0.8661, "step": 700 }, { "epoch": 0.9, "learning_rate": 3.642572566875356e-05, "loss": 1.0455, "step": 705 }, { "epoch": 0.91, "learning_rate": 3.636881047239613e-05, "loss": 0.8263, "step": 710 }, { "epoch": 0.92, "learning_rate": 3.631189527603871e-05, "loss": 0.6256, "step": 715 }, { "epoch": 0.92, "learning_rate": 3.625498007968128e-05, "loss": 0.788, "step": 720 }, { "epoch": 0.93, "learning_rate": 3.6198064883323853e-05, "loss": 0.7374, "step": 725 }, { "epoch": 0.93, "learning_rate": 3.614114968696642e-05, "loss": 0.6936, "step": 730 }, { "epoch": 0.94, "learning_rate": 3.6084234490608993e-05, "loss": 0.7579, "step": 735 }, { "epoch": 0.95, "learning_rate": 3.602731929425157e-05, "loss": 0.7191, "step": 740 }, { "epoch": 0.95, "learning_rate": 3.597040409789414e-05, "loss": 0.7349, "step": 745 }, { "epoch": 0.96, "learning_rate": 3.5913488901536714e-05, "loss": 0.6269, "step": 750 }, { "epoch": 0.97, "learning_rate": 3.585657370517929e-05, "loss": 0.664, "step": 755 }, { "epoch": 0.97, "learning_rate": 3.579965850882186e-05, "loss": 0.6365, "step": 760 }, { "epoch": 0.98, "learning_rate": 3.574274331246443e-05, "loss": 0.756, "step": 765 }, { "epoch": 0.99, "learning_rate": 3.568582811610701e-05, "loss": 0.7975, "step": 770 }, { "epoch": 0.99, "learning_rate": 3.5628912919749574e-05, "loss": 0.8584, "step": 775 }, { "epoch": 1.0, "learning_rate": 3.557199772339215e-05, "loss": 0.6439, "step": 780 }, { "epoch": 1.0, "eval_accuracy": 0.8138, "eval_loss": 0.6126329302787781, "eval_runtime": 60.9802, "eval_samples_per_second": 163.988, "eval_steps_per_second": 10.249, "step": 781 }, { "epoch": 1.01, "learning_rate": 3.551508252703472e-05, "loss": 0.6383, "step": 785 }, { "epoch": 1.01, "learning_rate": 3.5458167330677294e-05, "loss": 0.6756, "step": 790 }, { "epoch": 1.02, "learning_rate": 3.540125213431987e-05, "loss": 0.5847, "step": 795 }, { "epoch": 1.02, "learning_rate": 3.534433693796244e-05, "loss": 0.5047, "step": 800 }, { "epoch": 1.03, "learning_rate": 3.5287421741605014e-05, "loss": 0.5946, "step": 805 }, { "epoch": 1.04, "learning_rate": 3.523050654524758e-05, "loss": 0.51, "step": 810 }, { "epoch": 1.04, "learning_rate": 3.5173591348890154e-05, "loss": 0.5915, "step": 815 }, { "epoch": 1.05, "learning_rate": 3.511667615253273e-05, "loss": 0.6674, "step": 820 }, { "epoch": 1.06, "learning_rate": 3.50597609561753e-05, "loss": 0.6427, "step": 825 }, { "epoch": 1.06, "learning_rate": 3.5002845759817874e-05, "loss": 0.6404, "step": 830 }, { "epoch": 1.07, "learning_rate": 3.494593056346045e-05, "loss": 0.5568, "step": 835 }, { "epoch": 1.08, "learning_rate": 3.488901536710302e-05, "loss": 0.7048, "step": 840 }, { "epoch": 1.08, "learning_rate": 3.4832100170745594e-05, "loss": 0.5817, "step": 845 }, { "epoch": 1.09, "learning_rate": 3.477518497438817e-05, "loss": 0.5192, "step": 850 }, { "epoch": 1.09, "learning_rate": 3.4718269778030734e-05, "loss": 0.7096, "step": 855 }, { "epoch": 1.1, "learning_rate": 3.466135458167331e-05, "loss": 0.561, "step": 860 }, { "epoch": 1.11, "learning_rate": 3.460443938531588e-05, "loss": 0.6275, "step": 865 }, { "epoch": 1.11, "learning_rate": 3.4547524188958454e-05, "loss": 0.5082, "step": 870 }, { "epoch": 1.12, "learning_rate": 3.449060899260103e-05, "loss": 0.616, "step": 875 }, { "epoch": 1.13, "learning_rate": 3.44336937962436e-05, "loss": 0.5976, "step": 880 }, { "epoch": 1.13, "learning_rate": 3.4376778599886174e-05, "loss": 0.6847, "step": 885 }, { "epoch": 1.14, "learning_rate": 3.431986340352874e-05, "loss": 0.4798, "step": 890 }, { "epoch": 1.15, "learning_rate": 3.426294820717132e-05, "loss": 0.6393, "step": 895 }, { "epoch": 1.15, "learning_rate": 3.420603301081389e-05, "loss": 0.4907, "step": 900 }, { "epoch": 1.16, "learning_rate": 3.414911781445646e-05, "loss": 0.4741, "step": 905 }, { "epoch": 1.17, "learning_rate": 3.4092202618099035e-05, "loss": 0.4989, "step": 910 }, { "epoch": 1.17, "learning_rate": 3.403528742174161e-05, "loss": 0.6102, "step": 915 }, { "epoch": 1.18, "learning_rate": 3.397837222538418e-05, "loss": 0.6051, "step": 920 }, { "epoch": 1.18, "learning_rate": 3.3921457029026755e-05, "loss": 0.5615, "step": 925 }, { "epoch": 1.19, "learning_rate": 3.386454183266933e-05, "loss": 0.5091, "step": 930 }, { "epoch": 1.2, "learning_rate": 3.3807626636311895e-05, "loss": 0.5863, "step": 935 }, { "epoch": 1.2, "learning_rate": 3.375071143995447e-05, "loss": 0.6056, "step": 940 }, { "epoch": 1.21, "learning_rate": 3.369379624359704e-05, "loss": 0.4893, "step": 945 }, { "epoch": 1.22, "learning_rate": 3.3636881047239615e-05, "loss": 0.607, "step": 950 }, { "epoch": 1.22, "learning_rate": 3.357996585088219e-05, "loss": 0.5942, "step": 955 }, { "epoch": 1.23, "learning_rate": 3.352305065452476e-05, "loss": 0.5453, "step": 960 }, { "epoch": 1.24, "learning_rate": 3.3466135458167335e-05, "loss": 0.5637, "step": 965 }, { "epoch": 1.24, "learning_rate": 3.34092202618099e-05, "loss": 0.5974, "step": 970 }, { "epoch": 1.25, "learning_rate": 3.335230506545248e-05, "loss": 0.5365, "step": 975 }, { "epoch": 1.25, "learning_rate": 3.329538986909505e-05, "loss": 0.5487, "step": 980 }, { "epoch": 1.26, "learning_rate": 3.323847467273762e-05, "loss": 0.5981, "step": 985 }, { "epoch": 1.27, "learning_rate": 3.3181559476380195e-05, "loss": 0.4977, "step": 990 }, { "epoch": 1.27, "learning_rate": 3.312464428002277e-05, "loss": 0.4873, "step": 995 }, { "epoch": 1.28, "learning_rate": 3.306772908366534e-05, "loss": 0.6305, "step": 1000 }, { "epoch": 1.29, "learning_rate": 3.3010813887307915e-05, "loss": 0.4625, "step": 1005 }, { "epoch": 1.29, "learning_rate": 3.295389869095049e-05, "loss": 0.7791, "step": 1010 }, { "epoch": 1.3, "learning_rate": 3.2896983494593055e-05, "loss": 0.5784, "step": 1015 }, { "epoch": 1.31, "learning_rate": 3.2840068298235635e-05, "loss": 0.4482, "step": 1020 }, { "epoch": 1.31, "learning_rate": 3.27831531018782e-05, "loss": 0.5718, "step": 1025 }, { "epoch": 1.32, "learning_rate": 3.2726237905520775e-05, "loss": 0.5399, "step": 1030 }, { "epoch": 1.33, "learning_rate": 3.266932270916335e-05, "loss": 0.5408, "step": 1035 }, { "epoch": 1.33, "learning_rate": 3.261240751280592e-05, "loss": 0.5713, "step": 1040 }, { "epoch": 1.34, "learning_rate": 3.2555492316448495e-05, "loss": 0.3968, "step": 1045 }, { "epoch": 1.34, "learning_rate": 3.249857712009107e-05, "loss": 0.5708, "step": 1050 }, { "epoch": 1.35, "learning_rate": 3.244166192373364e-05, "loss": 0.6139, "step": 1055 }, { "epoch": 1.36, "learning_rate": 3.238474672737621e-05, "loss": 0.6031, "step": 1060 }, { "epoch": 1.36, "learning_rate": 3.232783153101878e-05, "loss": 0.4819, "step": 1065 }, { "epoch": 1.37, "learning_rate": 3.2270916334661356e-05, "loss": 0.5141, "step": 1070 }, { "epoch": 1.38, "learning_rate": 3.221400113830393e-05, "loss": 0.4998, "step": 1075 }, { "epoch": 1.38, "learning_rate": 3.21570859419465e-05, "loss": 0.4646, "step": 1080 }, { "epoch": 1.39, "learning_rate": 3.2100170745589076e-05, "loss": 0.4859, "step": 1085 }, { "epoch": 1.4, "learning_rate": 3.204325554923165e-05, "loss": 0.5069, "step": 1090 }, { "epoch": 1.4, "learning_rate": 3.1986340352874216e-05, "loss": 0.5751, "step": 1095 }, { "epoch": 1.41, "learning_rate": 3.1929425156516796e-05, "loss": 0.4505, "step": 1100 }, { "epoch": 1.41, "learning_rate": 3.187250996015936e-05, "loss": 0.5396, "step": 1105 }, { "epoch": 1.42, "learning_rate": 3.1815594763801936e-05, "loss": 0.5394, "step": 1110 }, { "epoch": 1.43, "learning_rate": 3.175867956744451e-05, "loss": 0.6824, "step": 1115 }, { "epoch": 1.43, "learning_rate": 3.170176437108708e-05, "loss": 0.414, "step": 1120 }, { "epoch": 1.44, "learning_rate": 3.1644849174729656e-05, "loss": 0.5944, "step": 1125 }, { "epoch": 1.45, "learning_rate": 3.158793397837223e-05, "loss": 0.5384, "step": 1130 }, { "epoch": 1.45, "learning_rate": 3.15310187820148e-05, "loss": 0.7521, "step": 1135 }, { "epoch": 1.46, "learning_rate": 3.147410358565737e-05, "loss": 0.6244, "step": 1140 }, { "epoch": 1.47, "learning_rate": 3.141718838929995e-05, "loss": 0.4822, "step": 1145 }, { "epoch": 1.47, "learning_rate": 3.1360273192942516e-05, "loss": 0.5942, "step": 1150 }, { "epoch": 1.48, "learning_rate": 3.130335799658509e-05, "loss": 0.5526, "step": 1155 }, { "epoch": 1.49, "learning_rate": 3.124644280022766e-05, "loss": 0.5807, "step": 1160 }, { "epoch": 1.49, "learning_rate": 3.1189527603870236e-05, "loss": 0.6191, "step": 1165 }, { "epoch": 1.5, "learning_rate": 3.113261240751281e-05, "loss": 0.4252, "step": 1170 }, { "epoch": 1.5, "learning_rate": 3.107569721115538e-05, "loss": 0.6039, "step": 1175 }, { "epoch": 1.51, "learning_rate": 3.1018782014797956e-05, "loss": 0.5023, "step": 1180 }, { "epoch": 1.52, "learning_rate": 3.096186681844052e-05, "loss": 0.4397, "step": 1185 }, { "epoch": 1.52, "learning_rate": 3.0904951622083096e-05, "loss": 0.5488, "step": 1190 }, { "epoch": 1.53, "learning_rate": 3.084803642572567e-05, "loss": 0.4943, "step": 1195 }, { "epoch": 1.54, "learning_rate": 3.079112122936824e-05, "loss": 0.4196, "step": 1200 }, { "epoch": 1.54, "learning_rate": 3.0734206033010816e-05, "loss": 0.5103, "step": 1205 }, { "epoch": 1.55, "learning_rate": 3.067729083665339e-05, "loss": 0.5383, "step": 1210 }, { "epoch": 1.56, "learning_rate": 3.062037564029596e-05, "loss": 0.5533, "step": 1215 }, { "epoch": 1.56, "learning_rate": 3.056346044393853e-05, "loss": 0.6003, "step": 1220 }, { "epoch": 1.57, "learning_rate": 3.0506545247581107e-05, "loss": 0.3887, "step": 1225 }, { "epoch": 1.57, "learning_rate": 3.0449630051223676e-05, "loss": 0.4925, "step": 1230 }, { "epoch": 1.58, "learning_rate": 3.0392714854866253e-05, "loss": 0.5327, "step": 1235 }, { "epoch": 1.59, "learning_rate": 3.0335799658508823e-05, "loss": 0.4195, "step": 1240 }, { "epoch": 1.59, "learning_rate": 3.0278884462151397e-05, "loss": 0.4912, "step": 1245 }, { "epoch": 1.6, "learning_rate": 3.022196926579397e-05, "loss": 0.6002, "step": 1250 }, { "epoch": 1.61, "learning_rate": 3.016505406943654e-05, "loss": 0.5191, "step": 1255 }, { "epoch": 1.61, "learning_rate": 3.0108138873079117e-05, "loss": 0.4732, "step": 1260 }, { "epoch": 1.62, "learning_rate": 3.0051223676721687e-05, "loss": 0.4728, "step": 1265 }, { "epoch": 1.63, "learning_rate": 2.999430848036426e-05, "loss": 0.658, "step": 1270 }, { "epoch": 1.63, "learning_rate": 2.993739328400683e-05, "loss": 0.3973, "step": 1275 }, { "epoch": 1.64, "learning_rate": 2.9880478087649403e-05, "loss": 0.518, "step": 1280 }, { "epoch": 1.65, "learning_rate": 2.982356289129198e-05, "loss": 0.513, "step": 1285 }, { "epoch": 1.65, "learning_rate": 2.976664769493455e-05, "loss": 0.4699, "step": 1290 }, { "epoch": 1.66, "learning_rate": 2.9709732498577124e-05, "loss": 0.5086, "step": 1295 }, { "epoch": 1.66, "learning_rate": 2.9652817302219694e-05, "loss": 0.4464, "step": 1300 }, { "epoch": 1.67, "learning_rate": 2.9595902105862267e-05, "loss": 0.4587, "step": 1305 }, { "epoch": 1.68, "learning_rate": 2.953898690950484e-05, "loss": 0.5568, "step": 1310 }, { "epoch": 1.68, "learning_rate": 2.9482071713147414e-05, "loss": 0.4991, "step": 1315 }, { "epoch": 1.69, "learning_rate": 2.9425156516789984e-05, "loss": 0.4953, "step": 1320 }, { "epoch": 1.7, "learning_rate": 2.9368241320432557e-05, "loss": 0.5821, "step": 1325 }, { "epoch": 1.7, "learning_rate": 2.9311326124075134e-05, "loss": 0.4582, "step": 1330 }, { "epoch": 1.71, "learning_rate": 2.9254410927717704e-05, "loss": 0.4931, "step": 1335 }, { "epoch": 1.72, "learning_rate": 2.9197495731360277e-05, "loss": 0.4979, "step": 1340 }, { "epoch": 1.72, "learning_rate": 2.9140580535002847e-05, "loss": 0.4933, "step": 1345 }, { "epoch": 1.73, "learning_rate": 2.908366533864542e-05, "loss": 0.463, "step": 1350 }, { "epoch": 1.73, "learning_rate": 2.902675014228799e-05, "loss": 0.4945, "step": 1355 }, { "epoch": 1.74, "learning_rate": 2.8969834945930567e-05, "loss": 0.4822, "step": 1360 }, { "epoch": 1.75, "learning_rate": 2.8912919749573137e-05, "loss": 0.5452, "step": 1365 }, { "epoch": 1.75, "learning_rate": 2.885600455321571e-05, "loss": 0.4868, "step": 1370 }, { "epoch": 1.76, "learning_rate": 2.8799089356858284e-05, "loss": 0.553, "step": 1375 }, { "epoch": 1.77, "learning_rate": 2.8742174160500854e-05, "loss": 0.5744, "step": 1380 }, { "epoch": 1.77, "learning_rate": 2.868525896414343e-05, "loss": 0.5091, "step": 1385 }, { "epoch": 1.78, "learning_rate": 2.8628343767786e-05, "loss": 0.5209, "step": 1390 }, { "epoch": 1.79, "learning_rate": 2.8571428571428574e-05, "loss": 0.5506, "step": 1395 }, { "epoch": 1.79, "learning_rate": 2.8514513375071144e-05, "loss": 0.5383, "step": 1400 }, { "epoch": 1.8, "learning_rate": 2.8457598178713718e-05, "loss": 0.5534, "step": 1405 }, { "epoch": 1.81, "learning_rate": 2.8400682982356294e-05, "loss": 0.3911, "step": 1410 }, { "epoch": 1.81, "learning_rate": 2.8343767785998864e-05, "loss": 0.501, "step": 1415 }, { "epoch": 1.82, "learning_rate": 2.8286852589641438e-05, "loss": 0.4988, "step": 1420 }, { "epoch": 1.82, "learning_rate": 2.8229937393284008e-05, "loss": 0.5158, "step": 1425 }, { "epoch": 1.83, "learning_rate": 2.817302219692658e-05, "loss": 0.4976, "step": 1430 }, { "epoch": 1.84, "learning_rate": 2.811610700056915e-05, "loss": 0.4873, "step": 1435 }, { "epoch": 1.84, "learning_rate": 2.8059191804211728e-05, "loss": 0.5198, "step": 1440 }, { "epoch": 1.85, "learning_rate": 2.8002276607854298e-05, "loss": 0.4795, "step": 1445 }, { "epoch": 1.86, "learning_rate": 2.794536141149687e-05, "loss": 0.5029, "step": 1450 }, { "epoch": 1.86, "learning_rate": 2.7888446215139448e-05, "loss": 0.4574, "step": 1455 }, { "epoch": 1.87, "learning_rate": 2.7831531018782018e-05, "loss": 0.4224, "step": 1460 }, { "epoch": 1.88, "learning_rate": 2.777461582242459e-05, "loss": 0.4447, "step": 1465 }, { "epoch": 1.88, "learning_rate": 2.771770062606716e-05, "loss": 0.5863, "step": 1470 }, { "epoch": 1.89, "learning_rate": 2.7660785429709735e-05, "loss": 0.5724, "step": 1475 }, { "epoch": 1.89, "learning_rate": 2.7603870233352305e-05, "loss": 0.4397, "step": 1480 }, { "epoch": 1.9, "learning_rate": 2.754695503699488e-05, "loss": 0.441, "step": 1485 }, { "epoch": 1.91, "learning_rate": 2.749003984063745e-05, "loss": 0.549, "step": 1490 }, { "epoch": 1.91, "learning_rate": 2.7433124644280025e-05, "loss": 0.4723, "step": 1495 }, { "epoch": 1.92, "learning_rate": 2.7376209447922598e-05, "loss": 0.4554, "step": 1500 }, { "epoch": 1.93, "learning_rate": 2.7319294251565168e-05, "loss": 0.5067, "step": 1505 }, { "epoch": 1.93, "learning_rate": 2.7262379055207745e-05, "loss": 0.3471, "step": 1510 }, { "epoch": 1.94, "learning_rate": 2.7205463858850315e-05, "loss": 0.4403, "step": 1515 }, { "epoch": 1.95, "learning_rate": 2.714854866249289e-05, "loss": 0.4034, "step": 1520 }, { "epoch": 1.95, "learning_rate": 2.709163346613546e-05, "loss": 0.617, "step": 1525 }, { "epoch": 1.96, "learning_rate": 2.7034718269778032e-05, "loss": 0.489, "step": 1530 }, { "epoch": 1.97, "learning_rate": 2.697780307342061e-05, "loss": 0.4514, "step": 1535 }, { "epoch": 1.97, "learning_rate": 2.692088787706318e-05, "loss": 0.4604, "step": 1540 }, { "epoch": 1.98, "learning_rate": 2.6863972680705752e-05, "loss": 0.4845, "step": 1545 }, { "epoch": 1.98, "learning_rate": 2.6807057484348322e-05, "loss": 0.4273, "step": 1550 }, { "epoch": 1.99, "learning_rate": 2.6750142287990895e-05, "loss": 0.3995, "step": 1555 }, { "epoch": 2.0, "learning_rate": 2.6693227091633465e-05, "loss": 0.6222, "step": 1560 }, { "epoch": 2.0, "eval_accuracy": 0.8393, "eval_loss": 0.5094287395477295, "eval_runtime": 60.7156, "eval_samples_per_second": 164.702, "eval_steps_per_second": 10.294, "step": 1562 }, { "epoch": 2.0, "learning_rate": 2.6636311895276042e-05, "loss": 0.3977, "step": 1565 }, { "epoch": 2.01, "learning_rate": 2.6579396698918612e-05, "loss": 0.2847, "step": 1570 }, { "epoch": 2.02, "learning_rate": 2.6522481502561185e-05, "loss": 0.384, "step": 1575 }, { "epoch": 2.02, "learning_rate": 2.6465566306203762e-05, "loss": 0.3344, "step": 1580 }, { "epoch": 2.03, "learning_rate": 2.6408651109846332e-05, "loss": 0.347, "step": 1585 }, { "epoch": 2.04, "learning_rate": 2.6351735913488905e-05, "loss": 0.3207, "step": 1590 }, { "epoch": 2.04, "learning_rate": 2.6294820717131475e-05, "loss": 0.3625, "step": 1595 }, { "epoch": 2.05, "learning_rate": 2.623790552077405e-05, "loss": 0.2822, "step": 1600 }, { "epoch": 2.06, "learning_rate": 2.618099032441662e-05, "loss": 0.3479, "step": 1605 }, { "epoch": 2.06, "learning_rate": 2.6124075128059196e-05, "loss": 0.318, "step": 1610 }, { "epoch": 2.07, "learning_rate": 2.6067159931701766e-05, "loss": 0.3668, "step": 1615 }, { "epoch": 2.07, "learning_rate": 2.601024473534434e-05, "loss": 0.3594, "step": 1620 }, { "epoch": 2.08, "learning_rate": 2.5953329538986912e-05, "loss": 0.3636, "step": 1625 }, { "epoch": 2.09, "learning_rate": 2.5896414342629482e-05, "loss": 0.3588, "step": 1630 }, { "epoch": 2.09, "learning_rate": 2.583949914627206e-05, "loss": 0.3155, "step": 1635 }, { "epoch": 2.1, "learning_rate": 2.578258394991463e-05, "loss": 0.3362, "step": 1640 }, { "epoch": 2.11, "learning_rate": 2.5725668753557202e-05, "loss": 0.3159, "step": 1645 }, { "epoch": 2.11, "learning_rate": 2.5668753557199772e-05, "loss": 0.3167, "step": 1650 }, { "epoch": 2.12, "learning_rate": 2.5611838360842346e-05, "loss": 0.3597, "step": 1655 }, { "epoch": 2.13, "learning_rate": 2.5554923164484923e-05, "loss": 0.2862, "step": 1660 }, { "epoch": 2.13, "learning_rate": 2.5498007968127493e-05, "loss": 0.4218, "step": 1665 }, { "epoch": 2.14, "learning_rate": 2.5441092771770066e-05, "loss": 0.3902, "step": 1670 }, { "epoch": 2.14, "learning_rate": 2.5384177575412636e-05, "loss": 0.371, "step": 1675 }, { "epoch": 2.15, "learning_rate": 2.532726237905521e-05, "loss": 0.3218, "step": 1680 }, { "epoch": 2.16, "learning_rate": 2.527034718269778e-05, "loss": 0.3233, "step": 1685 }, { "epoch": 2.16, "learning_rate": 2.5213431986340356e-05, "loss": 0.3293, "step": 1690 }, { "epoch": 2.17, "learning_rate": 2.5156516789982926e-05, "loss": 0.295, "step": 1695 }, { "epoch": 2.18, "learning_rate": 2.50996015936255e-05, "loss": 0.3192, "step": 1700 }, { "epoch": 2.18, "learning_rate": 2.5042686397268073e-05, "loss": 0.2638, "step": 1705 }, { "epoch": 2.19, "learning_rate": 2.4985771200910646e-05, "loss": 0.3065, "step": 1710 }, { "epoch": 2.2, "learning_rate": 2.492885600455322e-05, "loss": 0.3483, "step": 1715 }, { "epoch": 2.2, "learning_rate": 2.487194080819579e-05, "loss": 0.3138, "step": 1720 }, { "epoch": 2.21, "learning_rate": 2.4815025611838363e-05, "loss": 0.3677, "step": 1725 }, { "epoch": 2.22, "learning_rate": 2.4758110415480933e-05, "loss": 0.3726, "step": 1730 }, { "epoch": 2.22, "learning_rate": 2.470119521912351e-05, "loss": 0.3356, "step": 1735 }, { "epoch": 2.23, "learning_rate": 2.4644280022766083e-05, "loss": 0.3099, "step": 1740 }, { "epoch": 2.23, "learning_rate": 2.4587364826408653e-05, "loss": 0.283, "step": 1745 }, { "epoch": 2.24, "learning_rate": 2.4530449630051226e-05, "loss": 0.2828, "step": 1750 }, { "epoch": 2.25, "learning_rate": 2.4473534433693796e-05, "loss": 0.3751, "step": 1755 }, { "epoch": 2.25, "learning_rate": 2.4416619237336373e-05, "loss": 0.3227, "step": 1760 }, { "epoch": 2.26, "learning_rate": 2.4359704040978943e-05, "loss": 0.3716, "step": 1765 }, { "epoch": 2.27, "learning_rate": 2.4302788844621517e-05, "loss": 0.3669, "step": 1770 }, { "epoch": 2.27, "learning_rate": 2.4245873648264087e-05, "loss": 0.3195, "step": 1775 }, { "epoch": 2.28, "learning_rate": 2.418895845190666e-05, "loss": 0.3147, "step": 1780 }, { "epoch": 2.29, "learning_rate": 2.4132043255549237e-05, "loss": 0.339, "step": 1785 }, { "epoch": 2.29, "learning_rate": 2.4075128059191807e-05, "loss": 0.3949, "step": 1790 }, { "epoch": 2.3, "learning_rate": 2.401821286283438e-05, "loss": 0.2976, "step": 1795 }, { "epoch": 2.3, "learning_rate": 2.396129766647695e-05, "loss": 0.4075, "step": 1800 }, { "epoch": 2.31, "learning_rate": 2.3904382470119523e-05, "loss": 0.3482, "step": 1805 }, { "epoch": 2.32, "learning_rate": 2.3847467273762093e-05, "loss": 0.4089, "step": 1810 }, { "epoch": 2.32, "learning_rate": 2.379055207740467e-05, "loss": 0.3574, "step": 1815 }, { "epoch": 2.33, "learning_rate": 2.373363688104724e-05, "loss": 0.3617, "step": 1820 }, { "epoch": 2.34, "learning_rate": 2.3676721684689814e-05, "loss": 0.3421, "step": 1825 }, { "epoch": 2.34, "learning_rate": 2.3619806488332387e-05, "loss": 0.3523, "step": 1830 }, { "epoch": 2.35, "learning_rate": 2.3562891291974957e-05, "loss": 0.3594, "step": 1835 }, { "epoch": 2.36, "learning_rate": 2.3505976095617534e-05, "loss": 0.3177, "step": 1840 }, { "epoch": 2.36, "learning_rate": 2.3449060899260104e-05, "loss": 0.3867, "step": 1845 }, { "epoch": 2.37, "learning_rate": 2.3392145702902677e-05, "loss": 0.3826, "step": 1850 }, { "epoch": 2.38, "learning_rate": 2.3335230506545247e-05, "loss": 0.2243, "step": 1855 }, { "epoch": 2.38, "learning_rate": 2.3278315310187824e-05, "loss": 0.3039, "step": 1860 }, { "epoch": 2.39, "learning_rate": 2.3221400113830397e-05, "loss": 0.3555, "step": 1865 }, { "epoch": 2.39, "learning_rate": 2.3164484917472967e-05, "loss": 0.3321, "step": 1870 }, { "epoch": 2.4, "learning_rate": 2.310756972111554e-05, "loss": 0.3334, "step": 1875 }, { "epoch": 2.41, "learning_rate": 2.305065452475811e-05, "loss": 0.3629, "step": 1880 }, { "epoch": 2.41, "learning_rate": 2.2993739328400687e-05, "loss": 0.2421, "step": 1885 }, { "epoch": 2.42, "learning_rate": 2.2936824132043257e-05, "loss": 0.3204, "step": 1890 }, { "epoch": 2.43, "learning_rate": 2.287990893568583e-05, "loss": 0.3631, "step": 1895 }, { "epoch": 2.43, "learning_rate": 2.28229937393284e-05, "loss": 0.3279, "step": 1900 }, { "epoch": 2.44, "learning_rate": 2.2766078542970974e-05, "loss": 0.3008, "step": 1905 }, { "epoch": 2.45, "learning_rate": 2.270916334661355e-05, "loss": 0.4036, "step": 1910 }, { "epoch": 2.45, "learning_rate": 2.265224815025612e-05, "loss": 0.3201, "step": 1915 }, { "epoch": 2.46, "learning_rate": 2.2595332953898694e-05, "loss": 0.3041, "step": 1920 }, { "epoch": 2.46, "learning_rate": 2.2538417757541264e-05, "loss": 0.3208, "step": 1925 }, { "epoch": 2.47, "learning_rate": 2.2481502561183838e-05, "loss": 0.2943, "step": 1930 }, { "epoch": 2.48, "learning_rate": 2.2424587364826408e-05, "loss": 0.2831, "step": 1935 }, { "epoch": 2.48, "learning_rate": 2.2367672168468984e-05, "loss": 0.3645, "step": 1940 }, { "epoch": 2.49, "learning_rate": 2.2310756972111554e-05, "loss": 0.3532, "step": 1945 }, { "epoch": 2.5, "learning_rate": 2.2253841775754128e-05, "loss": 0.3504, "step": 1950 }, { "epoch": 2.5, "learning_rate": 2.21969265793967e-05, "loss": 0.3465, "step": 1955 }, { "epoch": 2.51, "learning_rate": 2.214001138303927e-05, "loss": 0.358, "step": 1960 }, { "epoch": 2.52, "learning_rate": 2.2083096186681848e-05, "loss": 0.3855, "step": 1965 }, { "epoch": 2.52, "learning_rate": 2.2026180990324418e-05, "loss": 0.2887, "step": 1970 }, { "epoch": 2.53, "learning_rate": 2.196926579396699e-05, "loss": 0.275, "step": 1975 }, { "epoch": 2.54, "learning_rate": 2.191235059760956e-05, "loss": 0.2384, "step": 1980 }, { "epoch": 2.54, "learning_rate": 2.1855435401252138e-05, "loss": 0.2829, "step": 1985 }, { "epoch": 2.55, "learning_rate": 2.179852020489471e-05, "loss": 0.3765, "step": 1990 }, { "epoch": 2.55, "learning_rate": 2.174160500853728e-05, "loss": 0.3509, "step": 1995 }, { "epoch": 2.56, "learning_rate": 2.1684689812179855e-05, "loss": 0.3517, "step": 2000 }, { "epoch": 2.57, "learning_rate": 2.1627774615822425e-05, "loss": 0.3016, "step": 2005 }, { "epoch": 2.57, "learning_rate": 2.1570859419465e-05, "loss": 0.3421, "step": 2010 }, { "epoch": 2.58, "learning_rate": 2.151394422310757e-05, "loss": 0.3054, "step": 2015 }, { "epoch": 2.59, "learning_rate": 2.1457029026750145e-05, "loss": 0.3658, "step": 2020 }, { "epoch": 2.59, "learning_rate": 2.1400113830392715e-05, "loss": 0.2979, "step": 2025 }, { "epoch": 2.6, "learning_rate": 2.1343198634035288e-05, "loss": 0.413, "step": 2030 }, { "epoch": 2.61, "learning_rate": 2.1286283437677865e-05, "loss": 0.3388, "step": 2035 }, { "epoch": 2.61, "learning_rate": 2.1229368241320435e-05, "loss": 0.2758, "step": 2040 }, { "epoch": 2.62, "learning_rate": 2.117245304496301e-05, "loss": 0.2786, "step": 2045 }, { "epoch": 2.62, "learning_rate": 2.111553784860558e-05, "loss": 0.2577, "step": 2050 }, { "epoch": 2.63, "learning_rate": 2.1058622652248152e-05, "loss": 0.26, "step": 2055 }, { "epoch": 2.64, "learning_rate": 2.1001707455890722e-05, "loss": 0.2994, "step": 2060 }, { "epoch": 2.64, "learning_rate": 2.09447922595333e-05, "loss": 0.2211, "step": 2065 }, { "epoch": 2.65, "learning_rate": 2.088787706317587e-05, "loss": 0.3152, "step": 2070 }, { "epoch": 2.66, "learning_rate": 2.0830961866818442e-05, "loss": 0.253, "step": 2075 }, { "epoch": 2.66, "learning_rate": 2.0774046670461015e-05, "loss": 0.3429, "step": 2080 }, { "epoch": 2.67, "learning_rate": 2.0717131474103585e-05, "loss": 0.2717, "step": 2085 }, { "epoch": 2.68, "learning_rate": 2.0660216277746162e-05, "loss": 0.2923, "step": 2090 }, { "epoch": 2.68, "learning_rate": 2.0603301081388732e-05, "loss": 0.2446, "step": 2095 }, { "epoch": 2.69, "learning_rate": 2.0546385885031305e-05, "loss": 0.2661, "step": 2100 }, { "epoch": 2.7, "learning_rate": 2.0489470688673875e-05, "loss": 0.3075, "step": 2105 }, { "epoch": 2.7, "learning_rate": 2.0432555492316452e-05, "loss": 0.3915, "step": 2110 }, { "epoch": 2.71, "learning_rate": 2.0375640295959025e-05, "loss": 0.385, "step": 2115 }, { "epoch": 2.71, "learning_rate": 2.0318725099601595e-05, "loss": 0.3714, "step": 2120 }, { "epoch": 2.72, "learning_rate": 2.026180990324417e-05, "loss": 0.3581, "step": 2125 }, { "epoch": 2.73, "learning_rate": 2.020489470688674e-05, "loss": 0.2439, "step": 2130 }, { "epoch": 2.73, "learning_rate": 2.0147979510529316e-05, "loss": 0.3, "step": 2135 }, { "epoch": 2.74, "learning_rate": 2.0091064314171886e-05, "loss": 0.2996, "step": 2140 }, { "epoch": 2.75, "learning_rate": 2.003414911781446e-05, "loss": 0.305, "step": 2145 }, { "epoch": 2.75, "learning_rate": 1.9977233921457032e-05, "loss": 0.3291, "step": 2150 }, { "epoch": 2.76, "learning_rate": 1.9920318725099602e-05, "loss": 0.2964, "step": 2155 }, { "epoch": 2.77, "learning_rate": 1.9863403528742176e-05, "loss": 0.4112, "step": 2160 }, { "epoch": 2.77, "learning_rate": 1.980648833238475e-05, "loss": 0.3476, "step": 2165 }, { "epoch": 2.78, "learning_rate": 1.974957313602732e-05, "loss": 0.314, "step": 2170 }, { "epoch": 2.78, "learning_rate": 1.9692657939669892e-05, "loss": 0.2829, "step": 2175 }, { "epoch": 2.79, "learning_rate": 1.9635742743312466e-05, "loss": 0.3628, "step": 2180 }, { "epoch": 2.8, "learning_rate": 1.957882754695504e-05, "loss": 0.2601, "step": 2185 }, { "epoch": 2.8, "learning_rate": 1.9521912350597613e-05, "loss": 0.401, "step": 2190 }, { "epoch": 2.81, "learning_rate": 1.9464997154240183e-05, "loss": 0.261, "step": 2195 }, { "epoch": 2.82, "learning_rate": 1.9408081957882756e-05, "loss": 0.3531, "step": 2200 }, { "epoch": 2.82, "learning_rate": 1.935116676152533e-05, "loss": 0.3118, "step": 2205 }, { "epoch": 2.83, "learning_rate": 1.92942515651679e-05, "loss": 0.3498, "step": 2210 }, { "epoch": 2.84, "learning_rate": 1.9237336368810473e-05, "loss": 0.3738, "step": 2215 }, { "epoch": 2.84, "learning_rate": 1.9180421172453046e-05, "loss": 0.2844, "step": 2220 }, { "epoch": 2.85, "learning_rate": 1.912350597609562e-05, "loss": 0.3668, "step": 2225 }, { "epoch": 2.86, "learning_rate": 1.9066590779738193e-05, "loss": 0.4105, "step": 2230 }, { "epoch": 2.86, "learning_rate": 1.9009675583380763e-05, "loss": 0.3562, "step": 2235 }, { "epoch": 2.87, "learning_rate": 1.8952760387023336e-05, "loss": 0.3053, "step": 2240 }, { "epoch": 2.87, "learning_rate": 1.889584519066591e-05, "loss": 0.3124, "step": 2245 }, { "epoch": 2.88, "learning_rate": 1.8838929994308483e-05, "loss": 0.3148, "step": 2250 }, { "epoch": 2.89, "learning_rate": 1.8782014797951053e-05, "loss": 0.2883, "step": 2255 }, { "epoch": 2.89, "learning_rate": 1.8725099601593626e-05, "loss": 0.3433, "step": 2260 }, { "epoch": 2.9, "learning_rate": 1.86681844052362e-05, "loss": 0.343, "step": 2265 }, { "epoch": 2.91, "learning_rate": 1.8611269208878773e-05, "loss": 0.2873, "step": 2270 }, { "epoch": 2.91, "learning_rate": 1.8554354012521346e-05, "loss": 0.3344, "step": 2275 }, { "epoch": 2.92, "learning_rate": 1.8497438816163916e-05, "loss": 0.2587, "step": 2280 }, { "epoch": 2.93, "learning_rate": 1.844052361980649e-05, "loss": 0.3247, "step": 2285 }, { "epoch": 2.93, "learning_rate": 1.8383608423449063e-05, "loss": 0.281, "step": 2290 }, { "epoch": 2.94, "learning_rate": 1.8326693227091633e-05, "loss": 0.2981, "step": 2295 }, { "epoch": 2.94, "learning_rate": 1.8269778030734207e-05, "loss": 0.228, "step": 2300 }, { "epoch": 2.95, "learning_rate": 1.821286283437678e-05, "loss": 0.3926, "step": 2305 }, { "epoch": 2.96, "learning_rate": 1.8155947638019353e-05, "loss": 0.2932, "step": 2310 }, { "epoch": 2.96, "learning_rate": 1.8099032441661927e-05, "loss": 0.364, "step": 2315 }, { "epoch": 2.97, "learning_rate": 1.8042117245304497e-05, "loss": 0.4113, "step": 2320 }, { "epoch": 2.98, "learning_rate": 1.798520204894707e-05, "loss": 0.3103, "step": 2325 }, { "epoch": 2.98, "learning_rate": 1.7928286852589643e-05, "loss": 0.2307, "step": 2330 }, { "epoch": 2.99, "learning_rate": 1.7871371656232213e-05, "loss": 0.2478, "step": 2335 }, { "epoch": 3.0, "learning_rate": 1.7814456459874787e-05, "loss": 0.2912, "step": 2340 }, { "epoch": 3.0, "eval_accuracy": 0.861, "eval_loss": 0.4452311098575592, "eval_runtime": 61.2046, "eval_samples_per_second": 163.386, "eval_steps_per_second": 10.212, "step": 2343 }, { "epoch": 3.0, "learning_rate": 1.775754126351736e-05, "loss": 0.2109, "step": 2345 }, { "epoch": 3.01, "learning_rate": 1.7700626067159934e-05, "loss": 0.2094, "step": 2350 }, { "epoch": 3.02, "learning_rate": 1.7643710870802507e-05, "loss": 0.2467, "step": 2355 }, { "epoch": 3.02, "learning_rate": 1.7586795674445077e-05, "loss": 0.2747, "step": 2360 }, { "epoch": 3.03, "learning_rate": 1.752988047808765e-05, "loss": 0.1656, "step": 2365 }, { "epoch": 3.03, "learning_rate": 1.7472965281730224e-05, "loss": 0.1659, "step": 2370 }, { "epoch": 3.04, "learning_rate": 1.7416050085372797e-05, "loss": 0.2871, "step": 2375 }, { "epoch": 3.05, "learning_rate": 1.7359134889015367e-05, "loss": 0.2369, "step": 2380 }, { "epoch": 3.05, "learning_rate": 1.730221969265794e-05, "loss": 0.2459, "step": 2385 }, { "epoch": 3.06, "learning_rate": 1.7245304496300514e-05, "loss": 0.1826, "step": 2390 }, { "epoch": 3.07, "learning_rate": 1.7188389299943087e-05, "loss": 0.2467, "step": 2395 }, { "epoch": 3.07, "learning_rate": 1.713147410358566e-05, "loss": 0.2196, "step": 2400 }, { "epoch": 3.08, "learning_rate": 1.707455890722823e-05, "loss": 0.2427, "step": 2405 }, { "epoch": 3.09, "learning_rate": 1.7017643710870804e-05, "loss": 0.2632, "step": 2410 }, { "epoch": 3.09, "learning_rate": 1.6960728514513377e-05, "loss": 0.1924, "step": 2415 }, { "epoch": 3.1, "learning_rate": 1.6903813318155947e-05, "loss": 0.2471, "step": 2420 }, { "epoch": 3.1, "learning_rate": 1.684689812179852e-05, "loss": 0.2159, "step": 2425 }, { "epoch": 3.11, "learning_rate": 1.6789982925441094e-05, "loss": 0.2591, "step": 2430 }, { "epoch": 3.12, "learning_rate": 1.6733067729083667e-05, "loss": 0.2674, "step": 2435 }, { "epoch": 3.12, "learning_rate": 1.667615253272624e-05, "loss": 0.2457, "step": 2440 }, { "epoch": 3.13, "learning_rate": 1.661923733636881e-05, "loss": 0.2419, "step": 2445 }, { "epoch": 3.14, "learning_rate": 1.6562322140011384e-05, "loss": 0.1977, "step": 2450 }, { "epoch": 3.14, "learning_rate": 1.6505406943653958e-05, "loss": 0.216, "step": 2455 }, { "epoch": 3.15, "learning_rate": 1.6448491747296528e-05, "loss": 0.2799, "step": 2460 }, { "epoch": 3.16, "learning_rate": 1.63915765509391e-05, "loss": 0.1789, "step": 2465 }, { "epoch": 3.16, "learning_rate": 1.6334661354581674e-05, "loss": 0.2677, "step": 2470 }, { "epoch": 3.17, "learning_rate": 1.6277746158224248e-05, "loss": 0.1893, "step": 2475 }, { "epoch": 3.18, "learning_rate": 1.622083096186682e-05, "loss": 0.1756, "step": 2480 }, { "epoch": 3.18, "learning_rate": 1.616391576550939e-05, "loss": 0.2038, "step": 2485 }, { "epoch": 3.19, "learning_rate": 1.6107000569151964e-05, "loss": 0.1776, "step": 2490 }, { "epoch": 3.19, "learning_rate": 1.6050085372794538e-05, "loss": 0.3071, "step": 2495 }, { "epoch": 3.2, "learning_rate": 1.5993170176437108e-05, "loss": 0.2819, "step": 2500 }, { "epoch": 3.21, "learning_rate": 1.593625498007968e-05, "loss": 0.2425, "step": 2505 }, { "epoch": 3.21, "learning_rate": 1.5879339783722255e-05, "loss": 0.2611, "step": 2510 }, { "epoch": 3.22, "learning_rate": 1.5822424587364828e-05, "loss": 0.1911, "step": 2515 }, { "epoch": 3.23, "learning_rate": 1.57655093910074e-05, "loss": 0.2089, "step": 2520 }, { "epoch": 3.23, "learning_rate": 1.5708594194649975e-05, "loss": 0.2004, "step": 2525 }, { "epoch": 3.24, "learning_rate": 1.5651678998292545e-05, "loss": 0.2162, "step": 2530 }, { "epoch": 3.25, "learning_rate": 1.5594763801935118e-05, "loss": 0.2117, "step": 2535 }, { "epoch": 3.25, "learning_rate": 1.553784860557769e-05, "loss": 0.2401, "step": 2540 }, { "epoch": 3.26, "learning_rate": 1.548093340922026e-05, "loss": 0.2056, "step": 2545 }, { "epoch": 3.26, "learning_rate": 1.5424018212862835e-05, "loss": 0.23, "step": 2550 }, { "epoch": 3.27, "learning_rate": 1.5367103016505408e-05, "loss": 0.2444, "step": 2555 }, { "epoch": 3.28, "learning_rate": 1.531018782014798e-05, "loss": 0.3034, "step": 2560 }, { "epoch": 3.28, "learning_rate": 1.5253272623790553e-05, "loss": 0.1683, "step": 2565 }, { "epoch": 3.29, "learning_rate": 1.5196357427433127e-05, "loss": 0.1557, "step": 2570 }, { "epoch": 3.3, "learning_rate": 1.5139442231075698e-05, "loss": 0.1504, "step": 2575 }, { "epoch": 3.3, "learning_rate": 1.508252703471827e-05, "loss": 0.2048, "step": 2580 }, { "epoch": 3.31, "learning_rate": 1.5025611838360843e-05, "loss": 0.2511, "step": 2585 }, { "epoch": 3.32, "learning_rate": 1.4968696642003415e-05, "loss": 0.204, "step": 2590 }, { "epoch": 3.32, "learning_rate": 1.491178144564599e-05, "loss": 0.2699, "step": 2595 }, { "epoch": 3.33, "learning_rate": 1.4854866249288562e-05, "loss": 0.1463, "step": 2600 }, { "epoch": 3.34, "learning_rate": 1.4797951052931133e-05, "loss": 0.1645, "step": 2605 }, { "epoch": 3.34, "learning_rate": 1.4741035856573707e-05, "loss": 0.1908, "step": 2610 }, { "epoch": 3.35, "learning_rate": 1.4684120660216279e-05, "loss": 0.2191, "step": 2615 }, { "epoch": 3.35, "learning_rate": 1.4627205463858852e-05, "loss": 0.2372, "step": 2620 }, { "epoch": 3.36, "learning_rate": 1.4570290267501424e-05, "loss": 0.2423, "step": 2625 }, { "epoch": 3.37, "learning_rate": 1.4513375071143995e-05, "loss": 0.141, "step": 2630 }, { "epoch": 3.37, "learning_rate": 1.4456459874786569e-05, "loss": 0.2345, "step": 2635 }, { "epoch": 3.38, "learning_rate": 1.4399544678429142e-05, "loss": 0.2508, "step": 2640 }, { "epoch": 3.39, "learning_rate": 1.4342629482071715e-05, "loss": 0.1987, "step": 2645 }, { "epoch": 3.39, "learning_rate": 1.4285714285714287e-05, "loss": 0.1662, "step": 2650 }, { "epoch": 3.4, "learning_rate": 1.4228799089356859e-05, "loss": 0.1729, "step": 2655 }, { "epoch": 3.41, "learning_rate": 1.4171883892999432e-05, "loss": 0.2623, "step": 2660 }, { "epoch": 3.41, "learning_rate": 1.4114968696642004e-05, "loss": 0.2242, "step": 2665 }, { "epoch": 3.42, "learning_rate": 1.4058053500284576e-05, "loss": 0.1906, "step": 2670 }, { "epoch": 3.42, "learning_rate": 1.4001138303927149e-05, "loss": 0.243, "step": 2675 }, { "epoch": 3.43, "learning_rate": 1.3944223107569724e-05, "loss": 0.2251, "step": 2680 }, { "epoch": 3.44, "learning_rate": 1.3887307911212296e-05, "loss": 0.2056, "step": 2685 }, { "epoch": 3.44, "learning_rate": 1.3830392714854867e-05, "loss": 0.2007, "step": 2690 }, { "epoch": 3.45, "learning_rate": 1.377347751849744e-05, "loss": 0.2273, "step": 2695 }, { "epoch": 3.46, "learning_rate": 1.3716562322140012e-05, "loss": 0.2652, "step": 2700 }, { "epoch": 3.46, "learning_rate": 1.3659647125782584e-05, "loss": 0.1765, "step": 2705 }, { "epoch": 3.47, "learning_rate": 1.3602731929425157e-05, "loss": 0.2289, "step": 2710 }, { "epoch": 3.48, "learning_rate": 1.354581673306773e-05, "loss": 0.2561, "step": 2715 }, { "epoch": 3.48, "learning_rate": 1.3488901536710304e-05, "loss": 0.2211, "step": 2720 }, { "epoch": 3.49, "learning_rate": 1.3431986340352876e-05, "loss": 0.1894, "step": 2725 }, { "epoch": 3.5, "learning_rate": 1.3375071143995448e-05, "loss": 0.1795, "step": 2730 }, { "epoch": 3.5, "learning_rate": 1.3318155947638021e-05, "loss": 0.1967, "step": 2735 }, { "epoch": 3.51, "learning_rate": 1.3261240751280593e-05, "loss": 0.2562, "step": 2740 }, { "epoch": 3.51, "learning_rate": 1.3204325554923166e-05, "loss": 0.2178, "step": 2745 }, { "epoch": 3.52, "learning_rate": 1.3147410358565738e-05, "loss": 0.1908, "step": 2750 }, { "epoch": 3.53, "learning_rate": 1.309049516220831e-05, "loss": 0.1789, "step": 2755 }, { "epoch": 3.53, "learning_rate": 1.3033579965850883e-05, "loss": 0.2742, "step": 2760 }, { "epoch": 3.54, "learning_rate": 1.2976664769493456e-05, "loss": 0.2605, "step": 2765 }, { "epoch": 3.55, "learning_rate": 1.291974957313603e-05, "loss": 0.2138, "step": 2770 }, { "epoch": 3.55, "learning_rate": 1.2862834376778601e-05, "loss": 0.1998, "step": 2775 }, { "epoch": 3.56, "learning_rate": 1.2805919180421173e-05, "loss": 0.2454, "step": 2780 }, { "epoch": 3.57, "learning_rate": 1.2749003984063746e-05, "loss": 0.2261, "step": 2785 }, { "epoch": 3.57, "learning_rate": 1.2692088787706318e-05, "loss": 0.1907, "step": 2790 }, { "epoch": 3.58, "learning_rate": 1.263517359134889e-05, "loss": 0.1849, "step": 2795 }, { "epoch": 3.58, "learning_rate": 1.2578258394991463e-05, "loss": 0.2552, "step": 2800 }, { "epoch": 3.59, "learning_rate": 1.2521343198634036e-05, "loss": 0.186, "step": 2805 }, { "epoch": 3.6, "learning_rate": 1.246442800227661e-05, "loss": 0.1753, "step": 2810 }, { "epoch": 3.6, "learning_rate": 1.2407512805919181e-05, "loss": 0.188, "step": 2815 }, { "epoch": 3.61, "learning_rate": 1.2350597609561755e-05, "loss": 0.2063, "step": 2820 }, { "epoch": 3.62, "learning_rate": 1.2293682413204327e-05, "loss": 0.1565, "step": 2825 }, { "epoch": 3.62, "learning_rate": 1.2236767216846898e-05, "loss": 0.185, "step": 2830 }, { "epoch": 3.63, "learning_rate": 1.2179852020489472e-05, "loss": 0.2167, "step": 2835 }, { "epoch": 3.64, "learning_rate": 1.2122936824132043e-05, "loss": 0.1588, "step": 2840 }, { "epoch": 3.64, "learning_rate": 1.2066021627774618e-05, "loss": 0.2709, "step": 2845 }, { "epoch": 3.65, "learning_rate": 1.200910643141719e-05, "loss": 0.2097, "step": 2850 }, { "epoch": 3.66, "learning_rate": 1.1952191235059762e-05, "loss": 0.1697, "step": 2855 }, { "epoch": 3.66, "learning_rate": 1.1895276038702335e-05, "loss": 0.2032, "step": 2860 }, { "epoch": 3.67, "learning_rate": 1.1838360842344907e-05, "loss": 0.2468, "step": 2865 }, { "epoch": 3.67, "learning_rate": 1.1781445645987478e-05, "loss": 0.1398, "step": 2870 }, { "epoch": 3.68, "learning_rate": 1.1724530449630052e-05, "loss": 0.2453, "step": 2875 }, { "epoch": 3.69, "learning_rate": 1.1667615253272624e-05, "loss": 0.2397, "step": 2880 }, { "epoch": 3.69, "learning_rate": 1.1610700056915199e-05, "loss": 0.1835, "step": 2885 }, { "epoch": 3.7, "learning_rate": 1.155378486055777e-05, "loss": 0.2497, "step": 2890 }, { "epoch": 3.71, "learning_rate": 1.1496869664200344e-05, "loss": 0.1499, "step": 2895 }, { "epoch": 3.71, "learning_rate": 1.1439954467842915e-05, "loss": 0.2455, "step": 2900 }, { "epoch": 3.72, "learning_rate": 1.1383039271485487e-05, "loss": 0.2016, "step": 2905 }, { "epoch": 3.73, "learning_rate": 1.132612407512806e-05, "loss": 0.2249, "step": 2910 }, { "epoch": 3.73, "learning_rate": 1.1269208878770632e-05, "loss": 0.1286, "step": 2915 }, { "epoch": 3.74, "learning_rate": 1.1212293682413204e-05, "loss": 0.2297, "step": 2920 }, { "epoch": 3.74, "learning_rate": 1.1155378486055777e-05, "loss": 0.1435, "step": 2925 }, { "epoch": 3.75, "learning_rate": 1.109846328969835e-05, "loss": 0.1694, "step": 2930 }, { "epoch": 3.76, "learning_rate": 1.1041548093340924e-05, "loss": 0.2167, "step": 2935 }, { "epoch": 3.76, "learning_rate": 1.0984632896983496e-05, "loss": 0.1979, "step": 2940 }, { "epoch": 3.77, "learning_rate": 1.0927717700626069e-05, "loss": 0.1548, "step": 2945 }, { "epoch": 3.78, "learning_rate": 1.087080250426864e-05, "loss": 0.2188, "step": 2950 }, { "epoch": 3.78, "learning_rate": 1.0813887307911212e-05, "loss": 0.2313, "step": 2955 }, { "epoch": 3.79, "learning_rate": 1.0756972111553786e-05, "loss": 0.2211, "step": 2960 }, { "epoch": 3.8, "learning_rate": 1.0700056915196357e-05, "loss": 0.1612, "step": 2965 }, { "epoch": 3.8, "learning_rate": 1.0643141718838932e-05, "loss": 0.2125, "step": 2970 }, { "epoch": 3.81, "learning_rate": 1.0586226522481504e-05, "loss": 0.206, "step": 2975 }, { "epoch": 3.82, "learning_rate": 1.0529311326124076e-05, "loss": 0.2112, "step": 2980 }, { "epoch": 3.82, "learning_rate": 1.047239612976665e-05, "loss": 0.1762, "step": 2985 }, { "epoch": 3.83, "learning_rate": 1.0415480933409221e-05, "loss": 0.169, "step": 2990 }, { "epoch": 3.83, "learning_rate": 1.0358565737051793e-05, "loss": 0.2013, "step": 2995 }, { "epoch": 3.84, "learning_rate": 1.0301650540694366e-05, "loss": 0.1734, "step": 3000 }, { "epoch": 3.85, "learning_rate": 1.0244735344336938e-05, "loss": 0.215, "step": 3005 }, { "epoch": 3.85, "learning_rate": 1.0187820147979513e-05, "loss": 0.2166, "step": 3010 }, { "epoch": 3.86, "learning_rate": 1.0130904951622084e-05, "loss": 0.2166, "step": 3015 }, { "epoch": 3.87, "learning_rate": 1.0073989755264658e-05, "loss": 0.1942, "step": 3020 }, { "epoch": 3.87, "learning_rate": 1.001707455890723e-05, "loss": 0.1952, "step": 3025 }, { "epoch": 3.88, "learning_rate": 9.960159362549801e-06, "loss": 0.1974, "step": 3030 }, { "epoch": 3.89, "learning_rate": 9.903244166192375e-06, "loss": 0.2231, "step": 3035 }, { "epoch": 3.89, "learning_rate": 9.846328969834946e-06, "loss": 0.2053, "step": 3040 }, { "epoch": 3.9, "learning_rate": 9.78941377347752e-06, "loss": 0.1894, "step": 3045 }, { "epoch": 3.9, "learning_rate": 9.732498577120091e-06, "loss": 0.2454, "step": 3050 }, { "epoch": 3.91, "learning_rate": 9.675583380762665e-06, "loss": 0.1853, "step": 3055 }, { "epoch": 3.92, "learning_rate": 9.618668184405236e-06, "loss": 0.2468, "step": 3060 }, { "epoch": 3.92, "learning_rate": 9.56175298804781e-06, "loss": 0.1915, "step": 3065 }, { "epoch": 3.93, "learning_rate": 9.504837791690381e-06, "loss": 0.2251, "step": 3070 }, { "epoch": 3.94, "learning_rate": 9.447922595332955e-06, "loss": 0.1638, "step": 3075 }, { "epoch": 3.94, "learning_rate": 9.391007398975526e-06, "loss": 0.16, "step": 3080 }, { "epoch": 3.95, "learning_rate": 9.3340922026181e-06, "loss": 0.1759, "step": 3085 }, { "epoch": 3.96, "learning_rate": 9.277177006260673e-06, "loss": 0.2079, "step": 3090 }, { "epoch": 3.96, "learning_rate": 9.220261809903245e-06, "loss": 0.22, "step": 3095 }, { "epoch": 3.97, "learning_rate": 9.163346613545817e-06, "loss": 0.2352, "step": 3100 }, { "epoch": 3.98, "learning_rate": 9.10643141718839e-06, "loss": 0.1975, "step": 3105 }, { "epoch": 3.98, "learning_rate": 9.049516220830963e-06, "loss": 0.2027, "step": 3110 }, { "epoch": 3.99, "learning_rate": 8.992601024473535e-06, "loss": 0.1782, "step": 3115 }, { "epoch": 3.99, "learning_rate": 8.935685828116107e-06, "loss": 0.2234, "step": 3120 }, { "epoch": 4.0, "eval_accuracy": 0.8679, "eval_loss": 0.43295106291770935, "eval_runtime": 60.931, "eval_samples_per_second": 164.12, "eval_steps_per_second": 10.257, "step": 3124 }, { "epoch": 4.0, "learning_rate": 8.87877063175868e-06, "loss": 0.1934, "step": 3125 }, { "epoch": 4.01, "learning_rate": 8.821855435401253e-06, "loss": 0.1317, "step": 3130 }, { "epoch": 4.01, "learning_rate": 8.764940239043825e-06, "loss": 0.2049, "step": 3135 }, { "epoch": 4.02, "learning_rate": 8.708025042686399e-06, "loss": 0.1348, "step": 3140 }, { "epoch": 4.03, "learning_rate": 8.65110984632897e-06, "loss": 0.1759, "step": 3145 }, { "epoch": 4.03, "learning_rate": 8.594194649971544e-06, "loss": 0.1538, "step": 3150 }, { "epoch": 4.04, "learning_rate": 8.537279453614115e-06, "loss": 0.1096, "step": 3155 }, { "epoch": 4.05, "learning_rate": 8.480364257256689e-06, "loss": 0.1689, "step": 3160 }, { "epoch": 4.05, "learning_rate": 8.42344906089926e-06, "loss": 0.1647, "step": 3165 }, { "epoch": 4.06, "learning_rate": 8.366533864541834e-06, "loss": 0.1881, "step": 3170 }, { "epoch": 4.07, "learning_rate": 8.309618668184405e-06, "loss": 0.1345, "step": 3175 }, { "epoch": 4.07, "learning_rate": 8.252703471826979e-06, "loss": 0.134, "step": 3180 }, { "epoch": 4.08, "learning_rate": 8.19578827546955e-06, "loss": 0.1413, "step": 3185 }, { "epoch": 4.08, "learning_rate": 8.138873079112124e-06, "loss": 0.1382, "step": 3190 }, { "epoch": 4.09, "learning_rate": 8.081957882754696e-06, "loss": 0.1666, "step": 3195 }, { "epoch": 4.1, "learning_rate": 8.025042686397269e-06, "loss": 0.1115, "step": 3200 }, { "epoch": 4.1, "learning_rate": 7.96812749003984e-06, "loss": 0.1575, "step": 3205 }, { "epoch": 4.11, "learning_rate": 7.911212293682414e-06, "loss": 0.1469, "step": 3210 }, { "epoch": 4.12, "learning_rate": 7.854297097324987e-06, "loss": 0.1367, "step": 3215 }, { "epoch": 4.12, "learning_rate": 7.797381900967559e-06, "loss": 0.1432, "step": 3220 }, { "epoch": 4.13, "learning_rate": 7.74046670461013e-06, "loss": 0.1375, "step": 3225 }, { "epoch": 4.14, "learning_rate": 7.683551508252704e-06, "loss": 0.1574, "step": 3230 }, { "epoch": 4.14, "learning_rate": 7.626636311895277e-06, "loss": 0.1289, "step": 3235 }, { "epoch": 4.15, "learning_rate": 7.569721115537849e-06, "loss": 0.1744, "step": 3240 }, { "epoch": 4.15, "learning_rate": 7.512805919180422e-06, "loss": 0.1705, "step": 3245 }, { "epoch": 4.16, "learning_rate": 7.455890722822995e-06, "loss": 0.1719, "step": 3250 }, { "epoch": 4.17, "learning_rate": 7.398975526465567e-06, "loss": 0.1454, "step": 3255 }, { "epoch": 4.17, "learning_rate": 7.342060330108139e-06, "loss": 0.1038, "step": 3260 }, { "epoch": 4.18, "learning_rate": 7.285145133750712e-06, "loss": 0.1544, "step": 3265 }, { "epoch": 4.19, "learning_rate": 7.228229937393284e-06, "loss": 0.1299, "step": 3270 }, { "epoch": 4.19, "learning_rate": 7.171314741035858e-06, "loss": 0.1465, "step": 3275 }, { "epoch": 4.2, "learning_rate": 7.114399544678429e-06, "loss": 0.1236, "step": 3280 }, { "epoch": 4.21, "learning_rate": 7.057484348321002e-06, "loss": 0.1329, "step": 3285 }, { "epoch": 4.21, "learning_rate": 7.0005691519635745e-06, "loss": 0.1716, "step": 3290 }, { "epoch": 4.22, "learning_rate": 6.943653955606148e-06, "loss": 0.1225, "step": 3295 }, { "epoch": 4.23, "learning_rate": 6.88673875924872e-06, "loss": 0.1124, "step": 3300 }, { "epoch": 4.23, "learning_rate": 6.829823562891292e-06, "loss": 0.1425, "step": 3305 }, { "epoch": 4.24, "learning_rate": 6.772908366533865e-06, "loss": 0.1209, "step": 3310 }, { "epoch": 4.24, "learning_rate": 6.715993170176438e-06, "loss": 0.1293, "step": 3315 }, { "epoch": 4.25, "learning_rate": 6.6590779738190105e-06, "loss": 0.1592, "step": 3320 }, { "epoch": 4.26, "learning_rate": 6.602162777461583e-06, "loss": 0.1525, "step": 3325 }, { "epoch": 4.26, "learning_rate": 6.545247581104155e-06, "loss": 0.1993, "step": 3330 }, { "epoch": 4.27, "learning_rate": 6.488332384746728e-06, "loss": 0.1312, "step": 3335 }, { "epoch": 4.28, "learning_rate": 6.431417188389301e-06, "loss": 0.1318, "step": 3340 }, { "epoch": 4.28, "learning_rate": 6.374501992031873e-06, "loss": 0.1576, "step": 3345 }, { "epoch": 4.29, "learning_rate": 6.317586795674445e-06, "loss": 0.1417, "step": 3350 }, { "epoch": 4.3, "learning_rate": 6.260671599317018e-06, "loss": 0.1696, "step": 3355 }, { "epoch": 4.3, "learning_rate": 6.203756402959591e-06, "loss": 0.1711, "step": 3360 }, { "epoch": 4.31, "learning_rate": 6.146841206602163e-06, "loss": 0.187, "step": 3365 }, { "epoch": 4.31, "learning_rate": 6.089926010244736e-06, "loss": 0.1165, "step": 3370 }, { "epoch": 4.32, "learning_rate": 6.033010813887309e-06, "loss": 0.1282, "step": 3375 }, { "epoch": 4.33, "learning_rate": 5.976095617529881e-06, "loss": 0.1528, "step": 3380 }, { "epoch": 4.33, "learning_rate": 5.919180421172453e-06, "loss": 0.1477, "step": 3385 }, { "epoch": 4.34, "learning_rate": 5.862265224815026e-06, "loss": 0.1666, "step": 3390 }, { "epoch": 4.35, "learning_rate": 5.805350028457599e-06, "loss": 0.1881, "step": 3395 }, { "epoch": 4.35, "learning_rate": 5.748434832100172e-06, "loss": 0.1377, "step": 3400 }, { "epoch": 4.36, "learning_rate": 5.6915196357427435e-06, "loss": 0.16, "step": 3405 }, { "epoch": 4.37, "learning_rate": 5.634604439385316e-06, "loss": 0.1488, "step": 3410 }, { "epoch": 4.37, "learning_rate": 5.577689243027889e-06, "loss": 0.143, "step": 3415 }, { "epoch": 4.38, "learning_rate": 5.520774046670462e-06, "loss": 0.1414, "step": 3420 }, { "epoch": 4.39, "learning_rate": 5.4638588503130345e-06, "loss": 0.1979, "step": 3425 }, { "epoch": 4.39, "learning_rate": 5.406943653955606e-06, "loss": 0.1419, "step": 3430 }, { "epoch": 4.4, "learning_rate": 5.350028457598179e-06, "loss": 0.1216, "step": 3435 }, { "epoch": 4.4, "learning_rate": 5.293113261240752e-06, "loss": 0.1433, "step": 3440 }, { "epoch": 4.41, "learning_rate": 5.236198064883325e-06, "loss": 0.1615, "step": 3445 }, { "epoch": 4.42, "learning_rate": 5.179282868525896e-06, "loss": 0.1652, "step": 3450 }, { "epoch": 4.42, "learning_rate": 5.122367672168469e-06, "loss": 0.2001, "step": 3455 }, { "epoch": 4.43, "learning_rate": 5.065452475811042e-06, "loss": 0.1397, "step": 3460 }, { "epoch": 4.44, "learning_rate": 5.008537279453615e-06, "loss": 0.1599, "step": 3465 }, { "epoch": 4.44, "learning_rate": 4.951622083096187e-06, "loss": 0.1167, "step": 3470 }, { "epoch": 4.45, "learning_rate": 4.89470688673876e-06, "loss": 0.1473, "step": 3475 }, { "epoch": 4.46, "learning_rate": 4.837791690381332e-06, "loss": 0.1486, "step": 3480 }, { "epoch": 4.46, "learning_rate": 4.780876494023905e-06, "loss": 0.1604, "step": 3485 }, { "epoch": 4.47, "learning_rate": 4.723961297666477e-06, "loss": 0.1364, "step": 3490 }, { "epoch": 4.47, "learning_rate": 4.66704610130905e-06, "loss": 0.183, "step": 3495 }, { "epoch": 4.48, "learning_rate": 4.6101309049516225e-06, "loss": 0.1803, "step": 3500 }, { "epoch": 4.49, "learning_rate": 4.553215708594195e-06, "loss": 0.1405, "step": 3505 }, { "epoch": 4.49, "learning_rate": 4.4963005122367675e-06, "loss": 0.1436, "step": 3510 }, { "epoch": 4.5, "learning_rate": 4.43938531587934e-06, "loss": 0.1442, "step": 3515 }, { "epoch": 4.51, "learning_rate": 4.382470119521913e-06, "loss": 0.0979, "step": 3520 }, { "epoch": 4.51, "learning_rate": 4.325554923164485e-06, "loss": 0.1218, "step": 3525 }, { "epoch": 4.52, "learning_rate": 4.268639726807058e-06, "loss": 0.1965, "step": 3530 }, { "epoch": 4.53, "learning_rate": 4.21172453044963e-06, "loss": 0.161, "step": 3535 }, { "epoch": 4.53, "learning_rate": 4.154809334092203e-06, "loss": 0.1378, "step": 3540 }, { "epoch": 4.54, "learning_rate": 4.097894137734775e-06, "loss": 0.1333, "step": 3545 }, { "epoch": 4.55, "learning_rate": 4.040978941377348e-06, "loss": 0.137, "step": 3550 }, { "epoch": 4.55, "learning_rate": 3.98406374501992e-06, "loss": 0.1652, "step": 3555 }, { "epoch": 4.56, "learning_rate": 3.927148548662494e-06, "loss": 0.1655, "step": 3560 }, { "epoch": 4.56, "learning_rate": 3.870233352305065e-06, "loss": 0.1791, "step": 3565 }, { "epoch": 4.57, "learning_rate": 3.8133181559476383e-06, "loss": 0.128, "step": 3570 }, { "epoch": 4.58, "learning_rate": 3.756402959590211e-06, "loss": 0.1567, "step": 3575 }, { "epoch": 4.58, "learning_rate": 3.6994877632327834e-06, "loss": 0.1564, "step": 3580 }, { "epoch": 4.59, "learning_rate": 3.642572566875356e-06, "loss": 0.1369, "step": 3585 }, { "epoch": 4.6, "learning_rate": 3.585657370517929e-06, "loss": 0.176, "step": 3590 }, { "epoch": 4.6, "learning_rate": 3.528742174160501e-06, "loss": 0.1456, "step": 3595 }, { "epoch": 4.61, "learning_rate": 3.471826977803074e-06, "loss": 0.1346, "step": 3600 }, { "epoch": 4.62, "learning_rate": 3.414911781445646e-06, "loss": 0.1565, "step": 3605 }, { "epoch": 4.62, "learning_rate": 3.357996585088219e-06, "loss": 0.1792, "step": 3610 }, { "epoch": 4.63, "learning_rate": 3.3010813887307915e-06, "loss": 0.1411, "step": 3615 }, { "epoch": 4.63, "learning_rate": 3.244166192373364e-06, "loss": 0.1847, "step": 3620 }, { "epoch": 4.64, "learning_rate": 3.1872509960159366e-06, "loss": 0.19, "step": 3625 }, { "epoch": 4.65, "learning_rate": 3.130335799658509e-06, "loss": 0.1474, "step": 3630 }, { "epoch": 4.65, "learning_rate": 3.0734206033010816e-06, "loss": 0.1869, "step": 3635 }, { "epoch": 4.66, "learning_rate": 3.0165054069436546e-06, "loss": 0.1128, "step": 3640 }, { "epoch": 4.67, "learning_rate": 2.9595902105862267e-06, "loss": 0.1203, "step": 3645 }, { "epoch": 4.67, "learning_rate": 2.9026750142287997e-06, "loss": 0.1286, "step": 3650 }, { "epoch": 4.68, "learning_rate": 2.8457598178713718e-06, "loss": 0.12, "step": 3655 }, { "epoch": 4.69, "learning_rate": 2.7888446215139443e-06, "loss": 0.095, "step": 3660 }, { "epoch": 4.69, "learning_rate": 2.7319294251565172e-06, "loss": 0.1002, "step": 3665 }, { "epoch": 4.7, "learning_rate": 2.6750142287990894e-06, "loss": 0.1256, "step": 3670 }, { "epoch": 4.71, "learning_rate": 2.6180990324416623e-06, "loss": 0.1148, "step": 3675 }, { "epoch": 4.71, "learning_rate": 2.5611838360842344e-06, "loss": 0.1646, "step": 3680 }, { "epoch": 4.72, "learning_rate": 2.5042686397268074e-06, "loss": 0.1083, "step": 3685 }, { "epoch": 4.72, "learning_rate": 2.44735344336938e-06, "loss": 0.1251, "step": 3690 }, { "epoch": 4.73, "learning_rate": 2.3904382470119524e-06, "loss": 0.1242, "step": 3695 }, { "epoch": 4.74, "learning_rate": 2.333523050654525e-06, "loss": 0.1671, "step": 3700 }, { "epoch": 4.74, "learning_rate": 2.2766078542970975e-06, "loss": 0.1442, "step": 3705 }, { "epoch": 4.75, "learning_rate": 2.21969265793967e-06, "loss": 0.1694, "step": 3710 }, { "epoch": 4.76, "learning_rate": 2.1627774615822426e-06, "loss": 0.1632, "step": 3715 }, { "epoch": 4.76, "learning_rate": 2.105862265224815e-06, "loss": 0.1738, "step": 3720 }, { "epoch": 4.77, "learning_rate": 2.0489470688673876e-06, "loss": 0.1062, "step": 3725 }, { "epoch": 4.78, "learning_rate": 1.99203187250996e-06, "loss": 0.1395, "step": 3730 }, { "epoch": 4.78, "learning_rate": 1.9351166761525327e-06, "loss": 0.1321, "step": 3735 }, { "epoch": 4.79, "learning_rate": 1.8782014797951054e-06, "loss": 0.1251, "step": 3740 }, { "epoch": 4.79, "learning_rate": 1.821286283437678e-06, "loss": 0.1499, "step": 3745 }, { "epoch": 4.8, "learning_rate": 1.7643710870802505e-06, "loss": 0.1338, "step": 3750 }, { "epoch": 4.81, "learning_rate": 1.707455890722823e-06, "loss": 0.1873, "step": 3755 }, { "epoch": 4.81, "learning_rate": 1.6505406943653958e-06, "loss": 0.1352, "step": 3760 }, { "epoch": 4.82, "learning_rate": 1.5936254980079683e-06, "loss": 0.1673, "step": 3765 }, { "epoch": 4.83, "learning_rate": 1.5367103016505408e-06, "loss": 0.1705, "step": 3770 }, { "epoch": 4.83, "learning_rate": 1.4797951052931133e-06, "loss": 0.1145, "step": 3775 }, { "epoch": 4.84, "learning_rate": 1.4228799089356859e-06, "loss": 0.0999, "step": 3780 }, { "epoch": 4.85, "learning_rate": 1.3659647125782586e-06, "loss": 0.1012, "step": 3785 }, { "epoch": 4.85, "learning_rate": 1.3090495162208312e-06, "loss": 0.1677, "step": 3790 }, { "epoch": 4.86, "learning_rate": 1.2521343198634037e-06, "loss": 0.169, "step": 3795 }, { "epoch": 4.87, "learning_rate": 1.1952191235059762e-06, "loss": 0.1844, "step": 3800 }, { "epoch": 4.87, "learning_rate": 1.1383039271485487e-06, "loss": 0.1701, "step": 3805 }, { "epoch": 4.88, "learning_rate": 1.0813887307911213e-06, "loss": 0.1319, "step": 3810 }, { "epoch": 4.88, "learning_rate": 1.0244735344336938e-06, "loss": 0.1546, "step": 3815 }, { "epoch": 4.89, "learning_rate": 9.675583380762663e-07, "loss": 0.1579, "step": 3820 }, { "epoch": 4.9, "learning_rate": 9.10643141718839e-07, "loss": 0.1308, "step": 3825 }, { "epoch": 4.9, "learning_rate": 8.537279453614115e-07, "loss": 0.1453, "step": 3830 }, { "epoch": 4.91, "learning_rate": 7.968127490039841e-07, "loss": 0.169, "step": 3835 }, { "epoch": 4.92, "learning_rate": 7.398975526465567e-07, "loss": 0.1191, "step": 3840 }, { "epoch": 4.92, "learning_rate": 6.829823562891293e-07, "loss": 0.152, "step": 3845 }, { "epoch": 4.93, "learning_rate": 6.260671599317018e-07, "loss": 0.1275, "step": 3850 }, { "epoch": 4.94, "learning_rate": 5.691519635742744e-07, "loss": 0.1409, "step": 3855 }, { "epoch": 4.94, "learning_rate": 5.122367672168469e-07, "loss": 0.1332, "step": 3860 }, { "epoch": 4.95, "learning_rate": 4.553215708594195e-07, "loss": 0.163, "step": 3865 }, { "epoch": 4.95, "learning_rate": 3.9840637450199207e-07, "loss": 0.1564, "step": 3870 }, { "epoch": 4.96, "learning_rate": 3.4149117814456466e-07, "loss": 0.2202, "step": 3875 }, { "epoch": 4.97, "learning_rate": 2.845759817871372e-07, "loss": 0.1769, "step": 3880 }, { "epoch": 4.97, "learning_rate": 2.2766078542970974e-07, "loss": 0.1343, "step": 3885 }, { "epoch": 4.98, "learning_rate": 1.7074558907228233e-07, "loss": 0.1481, "step": 3890 }, { "epoch": 4.99, "learning_rate": 1.1383039271485487e-07, "loss": 0.1591, "step": 3895 }, { "epoch": 4.99, "learning_rate": 5.6915196357427436e-08, "loss": 0.1983, "step": 3900 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 0.121, "step": 3905 }, { "epoch": 5.0, "eval_accuracy": 0.8735, "eval_loss": 0.42226287722587585, "eval_runtime": 60.1747, "eval_samples_per_second": 166.183, "eval_steps_per_second": 10.386, "step": 3905 }, { "epoch": 5.0, "step": 3905, "total_flos": 6.230614598311477e+18, "train_loss": 0.0, "train_runtime": 0.1997, "train_samples_per_second": 1252007.126, "train_steps_per_second": 19556.351 } ], "max_steps": 3905, "num_train_epochs": 5, "total_flos": 6.230614598311477e+18, "trial_name": null, "trial_params": null }