diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4756 @@ +{ + "best_metric": 0.8735, + "best_model_checkpoint": "swin-tiny-finetuned-cifar100/checkpoint-3905", + "epoch": 4.99968, + "global_step": 3905, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 5.115089514066497e-07, + "loss": 4.6445, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 1.0230179028132994e-06, + "loss": 4.6363, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 1.534526854219949e-06, + "loss": 4.6361, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 2.0460358056265987e-06, + "loss": 4.6511, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 2.5575447570332483e-06, + "loss": 4.6596, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 3.069053708439898e-06, + "loss": 4.6345, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 3.5805626598465474e-06, + "loss": 4.6177, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 4.092071611253197e-06, + "loss": 4.6046, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 4.603580562659847e-06, + "loss": 4.6372, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 5.1150895140664966e-06, + "loss": 4.5815, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 5.626598465473146e-06, + "loss": 4.5585, + "step": 55 + }, + { + "epoch": 0.08, + "learning_rate": 6.138107416879796e-06, + "loss": 4.6204, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 6.649616368286445e-06, + "loss": 4.5496, + "step": 65 + }, + { + "epoch": 0.09, + "learning_rate": 7.161125319693095e-06, + "loss": 4.5638, + "step": 70 + }, + { + "epoch": 0.1, + "learning_rate": 7.672634271099745e-06, + "loss": 4.5451, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 8.184143222506395e-06, + "loss": 4.5389, + "step": 80 + }, + { + "epoch": 0.11, + "learning_rate": 8.695652173913044e-06, + "loss": 4.4942, + "step": 85 + }, + { + "epoch": 0.12, + "learning_rate": 9.207161125319694e-06, + "loss": 4.4929, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 9.718670076726344e-06, + "loss": 4.4607, + "step": 95 + }, + { + "epoch": 0.13, + "learning_rate": 1.0230179028132993e-05, + "loss": 4.4665, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 1.0741687979539643e-05, + "loss": 4.438, + "step": 105 + }, + { + "epoch": 0.14, + "learning_rate": 1.1253196930946292e-05, + "loss": 4.415, + "step": 110 + }, + { + "epoch": 0.15, + "learning_rate": 1.1764705882352942e-05, + "loss": 4.3831, + "step": 115 + }, + { + "epoch": 0.15, + "learning_rate": 1.2276214833759591e-05, + "loss": 4.3877, + "step": 120 + }, + { + "epoch": 0.16, + "learning_rate": 1.2787723785166241e-05, + "loss": 4.3453, + "step": 125 + }, + { + "epoch": 0.17, + "learning_rate": 1.329923273657289e-05, + "loss": 4.3025, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 1.381074168797954e-05, + "loss": 4.2795, + "step": 135 + }, + { + "epoch": 0.18, + "learning_rate": 1.432225063938619e-05, + "loss": 4.2353, + "step": 140 + }, + { + "epoch": 0.19, + "learning_rate": 1.483375959079284e-05, + "loss": 4.1786, + "step": 145 + }, + { + "epoch": 0.19, + "learning_rate": 1.534526854219949e-05, + "loss": 4.1939, + "step": 150 + }, + { + "epoch": 0.2, + "learning_rate": 1.585677749360614e-05, + "loss": 4.0723, + "step": 155 + }, + { + "epoch": 0.2, + "learning_rate": 1.636828644501279e-05, + "loss": 4.0102, + "step": 160 + }, + { + "epoch": 0.21, + "learning_rate": 1.687979539641944e-05, + "loss": 3.9292, + "step": 165 + }, + { + "epoch": 0.22, + "learning_rate": 1.739130434782609e-05, + "loss": 3.8505, + "step": 170 + }, + { + "epoch": 0.22, + "learning_rate": 1.790281329923274e-05, + "loss": 3.7539, + "step": 175 + }, + { + "epoch": 0.23, + "learning_rate": 1.8414322250639388e-05, + "loss": 3.6833, + "step": 180 + }, + { + "epoch": 0.24, + "learning_rate": 1.8925831202046038e-05, + "loss": 3.5281, + "step": 185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9437340153452687e-05, + "loss": 3.4329, + "step": 190 + }, + { + "epoch": 0.25, + "learning_rate": 1.9948849104859337e-05, + "loss": 3.3203, + "step": 195 + }, + { + "epoch": 0.26, + "learning_rate": 2.0460358056265986e-05, + "loss": 3.1954, + "step": 200 + }, + { + "epoch": 0.26, + "learning_rate": 2.0971867007672636e-05, + "loss": 3.0686, + "step": 205 + }, + { + "epoch": 0.27, + "learning_rate": 2.1483375959079285e-05, + "loss": 2.9424, + "step": 210 + }, + { + "epoch": 0.28, + "learning_rate": 2.1994884910485935e-05, + "loss": 3.0176, + "step": 215 + }, + { + "epoch": 0.28, + "learning_rate": 2.2506393861892585e-05, + "loss": 2.8912, + "step": 220 + }, + { + "epoch": 0.29, + "learning_rate": 2.3017902813299234e-05, + "loss": 2.7802, + "step": 225 + }, + { + "epoch": 0.29, + "learning_rate": 2.3529411764705884e-05, + "loss": 2.7013, + "step": 230 + }, + { + "epoch": 0.3, + "learning_rate": 2.4040920716112533e-05, + "loss": 2.6416, + "step": 235 + }, + { + "epoch": 0.31, + "learning_rate": 2.4552429667519183e-05, + "loss": 2.5482, + "step": 240 + }, + { + "epoch": 0.31, + "learning_rate": 2.5063938618925832e-05, + "loss": 2.3947, + "step": 245 + }, + { + "epoch": 0.32, + "learning_rate": 2.5575447570332482e-05, + "loss": 2.2859, + "step": 250 + }, + { + "epoch": 0.33, + "learning_rate": 2.608695652173913e-05, + "loss": 2.2609, + "step": 255 + }, + { + "epoch": 0.33, + "learning_rate": 2.659846547314578e-05, + "loss": 2.2065, + "step": 260 + }, + { + "epoch": 0.34, + "learning_rate": 2.710997442455243e-05, + "loss": 2.1006, + "step": 265 + }, + { + "epoch": 0.35, + "learning_rate": 2.762148337595908e-05, + "loss": 1.9752, + "step": 270 + }, + { + "epoch": 0.35, + "learning_rate": 2.813299232736573e-05, + "loss": 2.0412, + "step": 275 + }, + { + "epoch": 0.36, + "learning_rate": 2.864450127877238e-05, + "loss": 2.0338, + "step": 280 + }, + { + "epoch": 0.36, + "learning_rate": 2.915601023017903e-05, + "loss": 1.8349, + "step": 285 + }, + { + "epoch": 0.37, + "learning_rate": 2.966751918158568e-05, + "loss": 1.7146, + "step": 290 + }, + { + "epoch": 0.38, + "learning_rate": 3.0179028132992328e-05, + "loss": 1.7222, + "step": 295 + }, + { + "epoch": 0.38, + "learning_rate": 3.069053708439898e-05, + "loss": 1.8153, + "step": 300 + }, + { + "epoch": 0.39, + "learning_rate": 3.120204603580563e-05, + "loss": 1.6485, + "step": 305 + }, + { + "epoch": 0.4, + "learning_rate": 3.171355498721228e-05, + "loss": 1.485, + "step": 310 + }, + { + "epoch": 0.4, + "learning_rate": 3.222506393861893e-05, + "loss": 1.7435, + "step": 315 + }, + { + "epoch": 0.41, + "learning_rate": 3.273657289002558e-05, + "loss": 1.6707, + "step": 320 + }, + { + "epoch": 0.42, + "learning_rate": 3.324808184143223e-05, + "loss": 1.5172, + "step": 325 + }, + { + "epoch": 0.42, + "learning_rate": 3.375959079283888e-05, + "loss": 1.538, + "step": 330 + }, + { + "epoch": 0.43, + "learning_rate": 3.427109974424553e-05, + "loss": 1.424, + "step": 335 + }, + { + "epoch": 0.44, + "learning_rate": 3.478260869565218e-05, + "loss": 1.3758, + "step": 340 + }, + { + "epoch": 0.44, + "learning_rate": 3.529411764705883e-05, + "loss": 1.3251, + "step": 345 + }, + { + "epoch": 0.45, + "learning_rate": 3.580562659846548e-05, + "loss": 1.3147, + "step": 350 + }, + { + "epoch": 0.45, + "learning_rate": 3.6317135549872126e-05, + "loss": 1.3606, + "step": 355 + }, + { + "epoch": 0.46, + "learning_rate": 3.6828644501278776e-05, + "loss": 1.3198, + "step": 360 + }, + { + "epoch": 0.47, + "learning_rate": 3.7340153452685426e-05, + "loss": 1.4826, + "step": 365 + }, + { + "epoch": 0.47, + "learning_rate": 3.7851662404092075e-05, + "loss": 1.1348, + "step": 370 + }, + { + "epoch": 0.48, + "learning_rate": 3.8363171355498725e-05, + "loss": 1.2849, + "step": 375 + }, + { + "epoch": 0.49, + "learning_rate": 3.8874680306905374e-05, + "loss": 1.2261, + "step": 380 + }, + { + "epoch": 0.49, + "learning_rate": 3.9386189258312024e-05, + "loss": 1.175, + "step": 385 + }, + { + "epoch": 0.5, + "learning_rate": 3.989769820971867e-05, + "loss": 1.1654, + "step": 390 + }, + { + "epoch": 0.51, + "learning_rate": 3.9954467842914065e-05, + "loss": 1.2218, + "step": 395 + }, + { + "epoch": 0.51, + "learning_rate": 3.989755264655663e-05, + "loss": 1.0849, + "step": 400 + }, + { + "epoch": 0.52, + "learning_rate": 3.9840637450199205e-05, + "loss": 1.1411, + "step": 405 + }, + { + "epoch": 0.52, + "learning_rate": 3.978372225384178e-05, + "loss": 1.168, + "step": 410 + }, + { + "epoch": 0.53, + "learning_rate": 3.972680705748435e-05, + "loss": 1.2054, + "step": 415 + }, + { + "epoch": 0.54, + "learning_rate": 3.9669891861126925e-05, + "loss": 1.1557, + "step": 420 + }, + { + "epoch": 0.54, + "learning_rate": 3.96129766647695e-05, + "loss": 1.119, + "step": 425 + }, + { + "epoch": 0.55, + "learning_rate": 3.955606146841207e-05, + "loss": 1.1867, + "step": 430 + }, + { + "epoch": 0.56, + "learning_rate": 3.949914627205464e-05, + "loss": 0.9051, + "step": 435 + }, + { + "epoch": 0.56, + "learning_rate": 3.944223107569721e-05, + "loss": 1.0337, + "step": 440 + }, + { + "epoch": 0.57, + "learning_rate": 3.9385315879339785e-05, + "loss": 0.9839, + "step": 445 + }, + { + "epoch": 0.58, + "learning_rate": 3.932840068298236e-05, + "loss": 0.866, + "step": 450 + }, + { + "epoch": 0.58, + "learning_rate": 3.927148548662493e-05, + "loss": 0.9547, + "step": 455 + }, + { + "epoch": 0.59, + "learning_rate": 3.9214570290267505e-05, + "loss": 0.9838, + "step": 460 + }, + { + "epoch": 0.6, + "learning_rate": 3.915765509391008e-05, + "loss": 1.1078, + "step": 465 + }, + { + "epoch": 0.6, + "learning_rate": 3.910073989755265e-05, + "loss": 0.932, + "step": 470 + }, + { + "epoch": 0.61, + "learning_rate": 3.9043824701195225e-05, + "loss": 0.9879, + "step": 475 + }, + { + "epoch": 0.61, + "learning_rate": 3.898690950483779e-05, + "loss": 1.1054, + "step": 480 + }, + { + "epoch": 0.62, + "learning_rate": 3.8929994308480365e-05, + "loss": 0.9784, + "step": 485 + }, + { + "epoch": 0.63, + "learning_rate": 3.887307911212294e-05, + "loss": 1.0294, + "step": 490 + }, + { + "epoch": 0.63, + "learning_rate": 3.881616391576551e-05, + "loss": 0.946, + "step": 495 + }, + { + "epoch": 0.64, + "learning_rate": 3.8759248719408085e-05, + "loss": 0.9403, + "step": 500 + }, + { + "epoch": 0.65, + "learning_rate": 3.870233352305066e-05, + "loss": 0.8587, + "step": 505 + }, + { + "epoch": 0.65, + "learning_rate": 3.864541832669323e-05, + "loss": 0.881, + "step": 510 + }, + { + "epoch": 0.66, + "learning_rate": 3.85885031303358e-05, + "loss": 0.904, + "step": 515 + }, + { + "epoch": 0.67, + "learning_rate": 3.853158793397838e-05, + "loss": 0.9147, + "step": 520 + }, + { + "epoch": 0.67, + "learning_rate": 3.8474672737620945e-05, + "loss": 0.9299, + "step": 525 + }, + { + "epoch": 0.68, + "learning_rate": 3.841775754126352e-05, + "loss": 0.9064, + "step": 530 + }, + { + "epoch": 0.68, + "learning_rate": 3.836084234490609e-05, + "loss": 1.0158, + "step": 535 + }, + { + "epoch": 0.69, + "learning_rate": 3.8303927148548666e-05, + "loss": 0.8712, + "step": 540 + }, + { + "epoch": 0.7, + "learning_rate": 3.824701195219124e-05, + "loss": 0.8876, + "step": 545 + }, + { + "epoch": 0.7, + "learning_rate": 3.819009675583381e-05, + "loss": 0.9165, + "step": 550 + }, + { + "epoch": 0.71, + "learning_rate": 3.8133181559476386e-05, + "loss": 0.7727, + "step": 555 + }, + { + "epoch": 0.72, + "learning_rate": 3.807626636311895e-05, + "loss": 0.9544, + "step": 560 + }, + { + "epoch": 0.72, + "learning_rate": 3.8019351166761526e-05, + "loss": 0.8282, + "step": 565 + }, + { + "epoch": 0.73, + "learning_rate": 3.79624359704041e-05, + "loss": 0.8105, + "step": 570 + }, + { + "epoch": 0.74, + "learning_rate": 3.790552077404667e-05, + "loss": 0.8246, + "step": 575 + }, + { + "epoch": 0.74, + "learning_rate": 3.7848605577689246e-05, + "loss": 0.8071, + "step": 580 + }, + { + "epoch": 0.75, + "learning_rate": 3.779169038133182e-05, + "loss": 0.964, + "step": 585 + }, + { + "epoch": 0.76, + "learning_rate": 3.773477518497439e-05, + "loss": 0.8634, + "step": 590 + }, + { + "epoch": 0.76, + "learning_rate": 3.7677859988616966e-05, + "loss": 0.7772, + "step": 595 + }, + { + "epoch": 0.77, + "learning_rate": 3.762094479225954e-05, + "loss": 0.8086, + "step": 600 + }, + { + "epoch": 0.77, + "learning_rate": 3.7564029595902106e-05, + "loss": 0.8886, + "step": 605 + }, + { + "epoch": 0.78, + "learning_rate": 3.750711439954468e-05, + "loss": 0.8618, + "step": 610 + }, + { + "epoch": 0.79, + "learning_rate": 3.745019920318725e-05, + "loss": 0.8221, + "step": 615 + }, + { + "epoch": 0.79, + "learning_rate": 3.7393284006829826e-05, + "loss": 0.9336, + "step": 620 + }, + { + "epoch": 0.8, + "learning_rate": 3.73363688104724e-05, + "loss": 0.7384, + "step": 625 + }, + { + "epoch": 0.81, + "learning_rate": 3.727945361411497e-05, + "loss": 0.7313, + "step": 630 + }, + { + "epoch": 0.81, + "learning_rate": 3.7222538417757546e-05, + "loss": 0.9178, + "step": 635 + }, + { + "epoch": 0.82, + "learning_rate": 3.716562322140011e-05, + "loss": 0.8866, + "step": 640 + }, + { + "epoch": 0.83, + "learning_rate": 3.710870802504269e-05, + "loss": 0.6832, + "step": 645 + }, + { + "epoch": 0.83, + "learning_rate": 3.705179282868526e-05, + "loss": 0.776, + "step": 650 + }, + { + "epoch": 0.84, + "learning_rate": 3.699487763232783e-05, + "loss": 0.7298, + "step": 655 + }, + { + "epoch": 0.84, + "learning_rate": 3.6937962435970406e-05, + "loss": 0.7268, + "step": 660 + }, + { + "epoch": 0.85, + "learning_rate": 3.688104723961298e-05, + "loss": 0.8073, + "step": 665 + }, + { + "epoch": 0.86, + "learning_rate": 3.682413204325555e-05, + "loss": 0.7678, + "step": 670 + }, + { + "epoch": 0.86, + "learning_rate": 3.6767216846898126e-05, + "loss": 0.8216, + "step": 675 + }, + { + "epoch": 0.87, + "learning_rate": 3.67103016505407e-05, + "loss": 0.6896, + "step": 680 + }, + { + "epoch": 0.88, + "learning_rate": 3.6653386454183266e-05, + "loss": 0.8691, + "step": 685 + }, + { + "epoch": 0.88, + "learning_rate": 3.659647125782584e-05, + "loss": 0.8097, + "step": 690 + }, + { + "epoch": 0.89, + "learning_rate": 3.653955606146841e-05, + "loss": 0.7124, + "step": 695 + }, + { + "epoch": 0.9, + "learning_rate": 3.6482640865110987e-05, + "loss": 0.8661, + "step": 700 + }, + { + "epoch": 0.9, + "learning_rate": 3.642572566875356e-05, + "loss": 1.0455, + "step": 705 + }, + { + "epoch": 0.91, + "learning_rate": 3.636881047239613e-05, + "loss": 0.8263, + "step": 710 + }, + { + "epoch": 0.92, + "learning_rate": 3.631189527603871e-05, + "loss": 0.6256, + "step": 715 + }, + { + "epoch": 0.92, + "learning_rate": 3.625498007968128e-05, + "loss": 0.788, + "step": 720 + }, + { + "epoch": 0.93, + "learning_rate": 3.6198064883323853e-05, + "loss": 0.7374, + "step": 725 + }, + { + "epoch": 0.93, + "learning_rate": 3.614114968696642e-05, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 0.94, + "learning_rate": 3.6084234490608993e-05, + "loss": 0.7579, + "step": 735 + }, + { + "epoch": 0.95, + "learning_rate": 3.602731929425157e-05, + "loss": 0.7191, + "step": 740 + }, + { + "epoch": 0.95, + "learning_rate": 3.597040409789414e-05, + "loss": 0.7349, + "step": 745 + }, + { + "epoch": 0.96, + "learning_rate": 3.5913488901536714e-05, + "loss": 0.6269, + "step": 750 + }, + { + "epoch": 0.97, + "learning_rate": 3.585657370517929e-05, + "loss": 0.664, + "step": 755 + }, + { + "epoch": 0.97, + "learning_rate": 3.579965850882186e-05, + "loss": 0.6365, + "step": 760 + }, + { + "epoch": 0.98, + "learning_rate": 3.574274331246443e-05, + "loss": 0.756, + "step": 765 + }, + { + "epoch": 0.99, + "learning_rate": 3.568582811610701e-05, + "loss": 0.7975, + "step": 770 + }, + { + "epoch": 0.99, + "learning_rate": 3.5628912919749574e-05, + "loss": 0.8584, + "step": 775 + }, + { + "epoch": 1.0, + "learning_rate": 3.557199772339215e-05, + "loss": 0.6439, + "step": 780 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8138, + "eval_loss": 0.6126329302787781, + "eval_runtime": 60.9802, + "eval_samples_per_second": 163.988, + "eval_steps_per_second": 10.249, + "step": 781 + }, + { + "epoch": 1.01, + "learning_rate": 3.551508252703472e-05, + "loss": 0.6383, + "step": 785 + }, + { + "epoch": 1.01, + "learning_rate": 3.5458167330677294e-05, + "loss": 0.6756, + "step": 790 + }, + { + "epoch": 1.02, + "learning_rate": 3.540125213431987e-05, + "loss": 0.5847, + "step": 795 + }, + { + "epoch": 1.02, + "learning_rate": 3.534433693796244e-05, + "loss": 0.5047, + "step": 800 + }, + { + "epoch": 1.03, + "learning_rate": 3.5287421741605014e-05, + "loss": 0.5946, + "step": 805 + }, + { + "epoch": 1.04, + "learning_rate": 3.523050654524758e-05, + "loss": 0.51, + "step": 810 + }, + { + "epoch": 1.04, + "learning_rate": 3.5173591348890154e-05, + "loss": 0.5915, + "step": 815 + }, + { + "epoch": 1.05, + "learning_rate": 3.511667615253273e-05, + "loss": 0.6674, + "step": 820 + }, + { + "epoch": 1.06, + "learning_rate": 3.50597609561753e-05, + "loss": 0.6427, + "step": 825 + }, + { + "epoch": 1.06, + "learning_rate": 3.5002845759817874e-05, + "loss": 0.6404, + "step": 830 + }, + { + "epoch": 1.07, + "learning_rate": 3.494593056346045e-05, + "loss": 0.5568, + "step": 835 + }, + { + "epoch": 1.08, + "learning_rate": 3.488901536710302e-05, + "loss": 0.7048, + "step": 840 + }, + { + "epoch": 1.08, + "learning_rate": 3.4832100170745594e-05, + "loss": 0.5817, + "step": 845 + }, + { + "epoch": 1.09, + "learning_rate": 3.477518497438817e-05, + "loss": 0.5192, + "step": 850 + }, + { + "epoch": 1.09, + "learning_rate": 3.4718269778030734e-05, + "loss": 0.7096, + "step": 855 + }, + { + "epoch": 1.1, + "learning_rate": 3.466135458167331e-05, + "loss": 0.561, + "step": 860 + }, + { + "epoch": 1.11, + "learning_rate": 3.460443938531588e-05, + "loss": 0.6275, + "step": 865 + }, + { + "epoch": 1.11, + "learning_rate": 3.4547524188958454e-05, + "loss": 0.5082, + "step": 870 + }, + { + "epoch": 1.12, + "learning_rate": 3.449060899260103e-05, + "loss": 0.616, + "step": 875 + }, + { + "epoch": 1.13, + "learning_rate": 3.44336937962436e-05, + "loss": 0.5976, + "step": 880 + }, + { + "epoch": 1.13, + "learning_rate": 3.4376778599886174e-05, + "loss": 0.6847, + "step": 885 + }, + { + "epoch": 1.14, + "learning_rate": 3.431986340352874e-05, + "loss": 0.4798, + "step": 890 + }, + { + "epoch": 1.15, + "learning_rate": 3.426294820717132e-05, + "loss": 0.6393, + "step": 895 + }, + { + "epoch": 1.15, + "learning_rate": 3.420603301081389e-05, + "loss": 0.4907, + "step": 900 + }, + { + "epoch": 1.16, + "learning_rate": 3.414911781445646e-05, + "loss": 0.4741, + "step": 905 + }, + { + "epoch": 1.17, + "learning_rate": 3.4092202618099035e-05, + "loss": 0.4989, + "step": 910 + }, + { + "epoch": 1.17, + "learning_rate": 3.403528742174161e-05, + "loss": 0.6102, + "step": 915 + }, + { + "epoch": 1.18, + "learning_rate": 3.397837222538418e-05, + "loss": 0.6051, + "step": 920 + }, + { + "epoch": 1.18, + "learning_rate": 3.3921457029026755e-05, + "loss": 0.5615, + "step": 925 + }, + { + "epoch": 1.19, + "learning_rate": 3.386454183266933e-05, + "loss": 0.5091, + "step": 930 + }, + { + "epoch": 1.2, + "learning_rate": 3.3807626636311895e-05, + "loss": 0.5863, + "step": 935 + }, + { + "epoch": 1.2, + "learning_rate": 3.375071143995447e-05, + "loss": 0.6056, + "step": 940 + }, + { + "epoch": 1.21, + "learning_rate": 3.369379624359704e-05, + "loss": 0.4893, + "step": 945 + }, + { + "epoch": 1.22, + "learning_rate": 3.3636881047239615e-05, + "loss": 0.607, + "step": 950 + }, + { + "epoch": 1.22, + "learning_rate": 3.357996585088219e-05, + "loss": 0.5942, + "step": 955 + }, + { + "epoch": 1.23, + "learning_rate": 3.352305065452476e-05, + "loss": 0.5453, + "step": 960 + }, + { + "epoch": 1.24, + "learning_rate": 3.3466135458167335e-05, + "loss": 0.5637, + "step": 965 + }, + { + "epoch": 1.24, + "learning_rate": 3.34092202618099e-05, + "loss": 0.5974, + "step": 970 + }, + { + "epoch": 1.25, + "learning_rate": 3.335230506545248e-05, + "loss": 0.5365, + "step": 975 + }, + { + "epoch": 1.25, + "learning_rate": 3.329538986909505e-05, + "loss": 0.5487, + "step": 980 + }, + { + "epoch": 1.26, + "learning_rate": 3.323847467273762e-05, + "loss": 0.5981, + "step": 985 + }, + { + "epoch": 1.27, + "learning_rate": 3.3181559476380195e-05, + "loss": 0.4977, + "step": 990 + }, + { + "epoch": 1.27, + "learning_rate": 3.312464428002277e-05, + "loss": 0.4873, + "step": 995 + }, + { + "epoch": 1.28, + "learning_rate": 3.306772908366534e-05, + "loss": 0.6305, + "step": 1000 + }, + { + "epoch": 1.29, + "learning_rate": 3.3010813887307915e-05, + "loss": 0.4625, + "step": 1005 + }, + { + "epoch": 1.29, + "learning_rate": 3.295389869095049e-05, + "loss": 0.7791, + "step": 1010 + }, + { + "epoch": 1.3, + "learning_rate": 3.2896983494593055e-05, + "loss": 0.5784, + "step": 1015 + }, + { + "epoch": 1.31, + "learning_rate": 3.2840068298235635e-05, + "loss": 0.4482, + "step": 1020 + }, + { + "epoch": 1.31, + "learning_rate": 3.27831531018782e-05, + "loss": 0.5718, + "step": 1025 + }, + { + "epoch": 1.32, + "learning_rate": 3.2726237905520775e-05, + "loss": 0.5399, + "step": 1030 + }, + { + "epoch": 1.33, + "learning_rate": 3.266932270916335e-05, + "loss": 0.5408, + "step": 1035 + }, + { + "epoch": 1.33, + "learning_rate": 3.261240751280592e-05, + "loss": 0.5713, + "step": 1040 + }, + { + "epoch": 1.34, + "learning_rate": 3.2555492316448495e-05, + "loss": 0.3968, + "step": 1045 + }, + { + "epoch": 1.34, + "learning_rate": 3.249857712009107e-05, + "loss": 0.5708, + "step": 1050 + }, + { + "epoch": 1.35, + "learning_rate": 3.244166192373364e-05, + "loss": 0.6139, + "step": 1055 + }, + { + "epoch": 1.36, + "learning_rate": 3.238474672737621e-05, + "loss": 0.6031, + "step": 1060 + }, + { + "epoch": 1.36, + "learning_rate": 3.232783153101878e-05, + "loss": 0.4819, + "step": 1065 + }, + { + "epoch": 1.37, + "learning_rate": 3.2270916334661356e-05, + "loss": 0.5141, + "step": 1070 + }, + { + "epoch": 1.38, + "learning_rate": 3.221400113830393e-05, + "loss": 0.4998, + "step": 1075 + }, + { + "epoch": 1.38, + "learning_rate": 3.21570859419465e-05, + "loss": 0.4646, + "step": 1080 + }, + { + "epoch": 1.39, + "learning_rate": 3.2100170745589076e-05, + "loss": 0.4859, + "step": 1085 + }, + { + "epoch": 1.4, + "learning_rate": 3.204325554923165e-05, + "loss": 0.5069, + "step": 1090 + }, + { + "epoch": 1.4, + "learning_rate": 3.1986340352874216e-05, + "loss": 0.5751, + "step": 1095 + }, + { + "epoch": 1.41, + "learning_rate": 3.1929425156516796e-05, + "loss": 0.4505, + "step": 1100 + }, + { + "epoch": 1.41, + "learning_rate": 3.187250996015936e-05, + "loss": 0.5396, + "step": 1105 + }, + { + "epoch": 1.42, + "learning_rate": 3.1815594763801936e-05, + "loss": 0.5394, + "step": 1110 + }, + { + "epoch": 1.43, + "learning_rate": 3.175867956744451e-05, + "loss": 0.6824, + "step": 1115 + }, + { + "epoch": 1.43, + "learning_rate": 3.170176437108708e-05, + "loss": 0.414, + "step": 1120 + }, + { + "epoch": 1.44, + "learning_rate": 3.1644849174729656e-05, + "loss": 0.5944, + "step": 1125 + }, + { + "epoch": 1.45, + "learning_rate": 3.158793397837223e-05, + "loss": 0.5384, + "step": 1130 + }, + { + "epoch": 1.45, + "learning_rate": 3.15310187820148e-05, + "loss": 0.7521, + "step": 1135 + }, + { + "epoch": 1.46, + "learning_rate": 3.147410358565737e-05, + "loss": 0.6244, + "step": 1140 + }, + { + "epoch": 1.47, + "learning_rate": 3.141718838929995e-05, + "loss": 0.4822, + "step": 1145 + }, + { + "epoch": 1.47, + "learning_rate": 3.1360273192942516e-05, + "loss": 0.5942, + "step": 1150 + }, + { + "epoch": 1.48, + "learning_rate": 3.130335799658509e-05, + "loss": 0.5526, + "step": 1155 + }, + { + "epoch": 1.49, + "learning_rate": 3.124644280022766e-05, + "loss": 0.5807, + "step": 1160 + }, + { + "epoch": 1.49, + "learning_rate": 3.1189527603870236e-05, + "loss": 0.6191, + "step": 1165 + }, + { + "epoch": 1.5, + "learning_rate": 3.113261240751281e-05, + "loss": 0.4252, + "step": 1170 + }, + { + "epoch": 1.5, + "learning_rate": 3.107569721115538e-05, + "loss": 0.6039, + "step": 1175 + }, + { + "epoch": 1.51, + "learning_rate": 3.1018782014797956e-05, + "loss": 0.5023, + "step": 1180 + }, + { + "epoch": 1.52, + "learning_rate": 3.096186681844052e-05, + "loss": 0.4397, + "step": 1185 + }, + { + "epoch": 1.52, + "learning_rate": 3.0904951622083096e-05, + "loss": 0.5488, + "step": 1190 + }, + { + "epoch": 1.53, + "learning_rate": 3.084803642572567e-05, + "loss": 0.4943, + "step": 1195 + }, + { + "epoch": 1.54, + "learning_rate": 3.079112122936824e-05, + "loss": 0.4196, + "step": 1200 + }, + { + "epoch": 1.54, + "learning_rate": 3.0734206033010816e-05, + "loss": 0.5103, + "step": 1205 + }, + { + "epoch": 1.55, + "learning_rate": 3.067729083665339e-05, + "loss": 0.5383, + "step": 1210 + }, + { + "epoch": 1.56, + "learning_rate": 3.062037564029596e-05, + "loss": 0.5533, + "step": 1215 + }, + { + "epoch": 1.56, + "learning_rate": 3.056346044393853e-05, + "loss": 0.6003, + "step": 1220 + }, + { + "epoch": 1.57, + "learning_rate": 3.0506545247581107e-05, + "loss": 0.3887, + "step": 1225 + }, + { + "epoch": 1.57, + "learning_rate": 3.0449630051223676e-05, + "loss": 0.4925, + "step": 1230 + }, + { + "epoch": 1.58, + "learning_rate": 3.0392714854866253e-05, + "loss": 0.5327, + "step": 1235 + }, + { + "epoch": 1.59, + "learning_rate": 3.0335799658508823e-05, + "loss": 0.4195, + "step": 1240 + }, + { + "epoch": 1.59, + "learning_rate": 3.0278884462151397e-05, + "loss": 0.4912, + "step": 1245 + }, + { + "epoch": 1.6, + "learning_rate": 3.022196926579397e-05, + "loss": 0.6002, + "step": 1250 + }, + { + "epoch": 1.61, + "learning_rate": 3.016505406943654e-05, + "loss": 0.5191, + "step": 1255 + }, + { + "epoch": 1.61, + "learning_rate": 3.0108138873079117e-05, + "loss": 0.4732, + "step": 1260 + }, + { + "epoch": 1.62, + "learning_rate": 3.0051223676721687e-05, + "loss": 0.4728, + "step": 1265 + }, + { + "epoch": 1.63, + "learning_rate": 2.999430848036426e-05, + "loss": 0.658, + "step": 1270 + }, + { + "epoch": 1.63, + "learning_rate": 2.993739328400683e-05, + "loss": 0.3973, + "step": 1275 + }, + { + "epoch": 1.64, + "learning_rate": 2.9880478087649403e-05, + "loss": 0.518, + "step": 1280 + }, + { + "epoch": 1.65, + "learning_rate": 2.982356289129198e-05, + "loss": 0.513, + "step": 1285 + }, + { + "epoch": 1.65, + "learning_rate": 2.976664769493455e-05, + "loss": 0.4699, + "step": 1290 + }, + { + "epoch": 1.66, + "learning_rate": 2.9709732498577124e-05, + "loss": 0.5086, + "step": 1295 + }, + { + "epoch": 1.66, + "learning_rate": 2.9652817302219694e-05, + "loss": 0.4464, + "step": 1300 + }, + { + "epoch": 1.67, + "learning_rate": 2.9595902105862267e-05, + "loss": 0.4587, + "step": 1305 + }, + { + "epoch": 1.68, + "learning_rate": 2.953898690950484e-05, + "loss": 0.5568, + "step": 1310 + }, + { + "epoch": 1.68, + "learning_rate": 2.9482071713147414e-05, + "loss": 0.4991, + "step": 1315 + }, + { + "epoch": 1.69, + "learning_rate": 2.9425156516789984e-05, + "loss": 0.4953, + "step": 1320 + }, + { + "epoch": 1.7, + "learning_rate": 2.9368241320432557e-05, + "loss": 0.5821, + "step": 1325 + }, + { + "epoch": 1.7, + "learning_rate": 2.9311326124075134e-05, + "loss": 0.4582, + "step": 1330 + }, + { + "epoch": 1.71, + "learning_rate": 2.9254410927717704e-05, + "loss": 0.4931, + "step": 1335 + }, + { + "epoch": 1.72, + "learning_rate": 2.9197495731360277e-05, + "loss": 0.4979, + "step": 1340 + }, + { + "epoch": 1.72, + "learning_rate": 2.9140580535002847e-05, + "loss": 0.4933, + "step": 1345 + }, + { + "epoch": 1.73, + "learning_rate": 2.908366533864542e-05, + "loss": 0.463, + "step": 1350 + }, + { + "epoch": 1.73, + "learning_rate": 2.902675014228799e-05, + "loss": 0.4945, + "step": 1355 + }, + { + "epoch": 1.74, + "learning_rate": 2.8969834945930567e-05, + "loss": 0.4822, + "step": 1360 + }, + { + "epoch": 1.75, + "learning_rate": 2.8912919749573137e-05, + "loss": 0.5452, + "step": 1365 + }, + { + "epoch": 1.75, + "learning_rate": 2.885600455321571e-05, + "loss": 0.4868, + "step": 1370 + }, + { + "epoch": 1.76, + "learning_rate": 2.8799089356858284e-05, + "loss": 0.553, + "step": 1375 + }, + { + "epoch": 1.77, + "learning_rate": 2.8742174160500854e-05, + "loss": 0.5744, + "step": 1380 + }, + { + "epoch": 1.77, + "learning_rate": 2.868525896414343e-05, + "loss": 0.5091, + "step": 1385 + }, + { + "epoch": 1.78, + "learning_rate": 2.8628343767786e-05, + "loss": 0.5209, + "step": 1390 + }, + { + "epoch": 1.79, + "learning_rate": 2.8571428571428574e-05, + "loss": 0.5506, + "step": 1395 + }, + { + "epoch": 1.79, + "learning_rate": 2.8514513375071144e-05, + "loss": 0.5383, + "step": 1400 + }, + { + "epoch": 1.8, + "learning_rate": 2.8457598178713718e-05, + "loss": 0.5534, + "step": 1405 + }, + { + "epoch": 1.81, + "learning_rate": 2.8400682982356294e-05, + "loss": 0.3911, + "step": 1410 + }, + { + "epoch": 1.81, + "learning_rate": 2.8343767785998864e-05, + "loss": 0.501, + "step": 1415 + }, + { + "epoch": 1.82, + "learning_rate": 2.8286852589641438e-05, + "loss": 0.4988, + "step": 1420 + }, + { + "epoch": 1.82, + "learning_rate": 2.8229937393284008e-05, + "loss": 0.5158, + "step": 1425 + }, + { + "epoch": 1.83, + "learning_rate": 2.817302219692658e-05, + "loss": 0.4976, + "step": 1430 + }, + { + "epoch": 1.84, + "learning_rate": 2.811610700056915e-05, + "loss": 0.4873, + "step": 1435 + }, + { + "epoch": 1.84, + "learning_rate": 2.8059191804211728e-05, + "loss": 0.5198, + "step": 1440 + }, + { + "epoch": 1.85, + "learning_rate": 2.8002276607854298e-05, + "loss": 0.4795, + "step": 1445 + }, + { + "epoch": 1.86, + "learning_rate": 2.794536141149687e-05, + "loss": 0.5029, + "step": 1450 + }, + { + "epoch": 1.86, + "learning_rate": 2.7888446215139448e-05, + "loss": 0.4574, + "step": 1455 + }, + { + "epoch": 1.87, + "learning_rate": 2.7831531018782018e-05, + "loss": 0.4224, + "step": 1460 + }, + { + "epoch": 1.88, + "learning_rate": 2.777461582242459e-05, + "loss": 0.4447, + "step": 1465 + }, + { + "epoch": 1.88, + "learning_rate": 2.771770062606716e-05, + "loss": 0.5863, + "step": 1470 + }, + { + "epoch": 1.89, + "learning_rate": 2.7660785429709735e-05, + "loss": 0.5724, + "step": 1475 + }, + { + "epoch": 1.89, + "learning_rate": 2.7603870233352305e-05, + "loss": 0.4397, + "step": 1480 + }, + { + "epoch": 1.9, + "learning_rate": 2.754695503699488e-05, + "loss": 0.441, + "step": 1485 + }, + { + "epoch": 1.91, + "learning_rate": 2.749003984063745e-05, + "loss": 0.549, + "step": 1490 + }, + { + "epoch": 1.91, + "learning_rate": 2.7433124644280025e-05, + "loss": 0.4723, + "step": 1495 + }, + { + "epoch": 1.92, + "learning_rate": 2.7376209447922598e-05, + "loss": 0.4554, + "step": 1500 + }, + { + "epoch": 1.93, + "learning_rate": 2.7319294251565168e-05, + "loss": 0.5067, + "step": 1505 + }, + { + "epoch": 1.93, + "learning_rate": 2.7262379055207745e-05, + "loss": 0.3471, + "step": 1510 + }, + { + "epoch": 1.94, + "learning_rate": 2.7205463858850315e-05, + "loss": 0.4403, + "step": 1515 + }, + { + "epoch": 1.95, + "learning_rate": 2.714854866249289e-05, + "loss": 0.4034, + "step": 1520 + }, + { + "epoch": 1.95, + "learning_rate": 2.709163346613546e-05, + "loss": 0.617, + "step": 1525 + }, + { + "epoch": 1.96, + "learning_rate": 2.7034718269778032e-05, + "loss": 0.489, + "step": 1530 + }, + { + "epoch": 1.97, + "learning_rate": 2.697780307342061e-05, + "loss": 0.4514, + "step": 1535 + }, + { + "epoch": 1.97, + "learning_rate": 2.692088787706318e-05, + "loss": 0.4604, + "step": 1540 + }, + { + "epoch": 1.98, + "learning_rate": 2.6863972680705752e-05, + "loss": 0.4845, + "step": 1545 + }, + { + "epoch": 1.98, + "learning_rate": 2.6807057484348322e-05, + "loss": 0.4273, + "step": 1550 + }, + { + "epoch": 1.99, + "learning_rate": 2.6750142287990895e-05, + "loss": 0.3995, + "step": 1555 + }, + { + "epoch": 2.0, + "learning_rate": 2.6693227091633465e-05, + "loss": 0.6222, + "step": 1560 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8393, + "eval_loss": 0.5094287395477295, + "eval_runtime": 60.7156, + "eval_samples_per_second": 164.702, + "eval_steps_per_second": 10.294, + "step": 1562 + }, + { + "epoch": 2.0, + "learning_rate": 2.6636311895276042e-05, + "loss": 0.3977, + "step": 1565 + }, + { + "epoch": 2.01, + "learning_rate": 2.6579396698918612e-05, + "loss": 0.2847, + "step": 1570 + }, + { + "epoch": 2.02, + "learning_rate": 2.6522481502561185e-05, + "loss": 0.384, + "step": 1575 + }, + { + "epoch": 2.02, + "learning_rate": 2.6465566306203762e-05, + "loss": 0.3344, + "step": 1580 + }, + { + "epoch": 2.03, + "learning_rate": 2.6408651109846332e-05, + "loss": 0.347, + "step": 1585 + }, + { + "epoch": 2.04, + "learning_rate": 2.6351735913488905e-05, + "loss": 0.3207, + "step": 1590 + }, + { + "epoch": 2.04, + "learning_rate": 2.6294820717131475e-05, + "loss": 0.3625, + "step": 1595 + }, + { + "epoch": 2.05, + "learning_rate": 2.623790552077405e-05, + "loss": 0.2822, + "step": 1600 + }, + { + "epoch": 2.06, + "learning_rate": 2.618099032441662e-05, + "loss": 0.3479, + "step": 1605 + }, + { + "epoch": 2.06, + "learning_rate": 2.6124075128059196e-05, + "loss": 0.318, + "step": 1610 + }, + { + "epoch": 2.07, + "learning_rate": 2.6067159931701766e-05, + "loss": 0.3668, + "step": 1615 + }, + { + "epoch": 2.07, + "learning_rate": 2.601024473534434e-05, + "loss": 0.3594, + "step": 1620 + }, + { + "epoch": 2.08, + "learning_rate": 2.5953329538986912e-05, + "loss": 0.3636, + "step": 1625 + }, + { + "epoch": 2.09, + "learning_rate": 2.5896414342629482e-05, + "loss": 0.3588, + "step": 1630 + }, + { + "epoch": 2.09, + "learning_rate": 2.583949914627206e-05, + "loss": 0.3155, + "step": 1635 + }, + { + "epoch": 2.1, + "learning_rate": 2.578258394991463e-05, + "loss": 0.3362, + "step": 1640 + }, + { + "epoch": 2.11, + "learning_rate": 2.5725668753557202e-05, + "loss": 0.3159, + "step": 1645 + }, + { + "epoch": 2.11, + "learning_rate": 2.5668753557199772e-05, + "loss": 0.3167, + "step": 1650 + }, + { + "epoch": 2.12, + "learning_rate": 2.5611838360842346e-05, + "loss": 0.3597, + "step": 1655 + }, + { + "epoch": 2.13, + "learning_rate": 2.5554923164484923e-05, + "loss": 0.2862, + "step": 1660 + }, + { + "epoch": 2.13, + "learning_rate": 2.5498007968127493e-05, + "loss": 0.4218, + "step": 1665 + }, + { + "epoch": 2.14, + "learning_rate": 2.5441092771770066e-05, + "loss": 0.3902, + "step": 1670 + }, + { + "epoch": 2.14, + "learning_rate": 2.5384177575412636e-05, + "loss": 0.371, + "step": 1675 + }, + { + "epoch": 2.15, + "learning_rate": 2.532726237905521e-05, + "loss": 0.3218, + "step": 1680 + }, + { + "epoch": 2.16, + "learning_rate": 2.527034718269778e-05, + "loss": 0.3233, + "step": 1685 + }, + { + "epoch": 2.16, + "learning_rate": 2.5213431986340356e-05, + "loss": 0.3293, + "step": 1690 + }, + { + "epoch": 2.17, + "learning_rate": 2.5156516789982926e-05, + "loss": 0.295, + "step": 1695 + }, + { + "epoch": 2.18, + "learning_rate": 2.50996015936255e-05, + "loss": 0.3192, + "step": 1700 + }, + { + "epoch": 2.18, + "learning_rate": 2.5042686397268073e-05, + "loss": 0.2638, + "step": 1705 + }, + { + "epoch": 2.19, + "learning_rate": 2.4985771200910646e-05, + "loss": 0.3065, + "step": 1710 + }, + { + "epoch": 2.2, + "learning_rate": 2.492885600455322e-05, + "loss": 0.3483, + "step": 1715 + }, + { + "epoch": 2.2, + "learning_rate": 2.487194080819579e-05, + "loss": 0.3138, + "step": 1720 + }, + { + "epoch": 2.21, + "learning_rate": 2.4815025611838363e-05, + "loss": 0.3677, + "step": 1725 + }, + { + "epoch": 2.22, + "learning_rate": 2.4758110415480933e-05, + "loss": 0.3726, + "step": 1730 + }, + { + "epoch": 2.22, + "learning_rate": 2.470119521912351e-05, + "loss": 0.3356, + "step": 1735 + }, + { + "epoch": 2.23, + "learning_rate": 2.4644280022766083e-05, + "loss": 0.3099, + "step": 1740 + }, + { + "epoch": 2.23, + "learning_rate": 2.4587364826408653e-05, + "loss": 0.283, + "step": 1745 + }, + { + "epoch": 2.24, + "learning_rate": 2.4530449630051226e-05, + "loss": 0.2828, + "step": 1750 + }, + { + "epoch": 2.25, + "learning_rate": 2.4473534433693796e-05, + "loss": 0.3751, + "step": 1755 + }, + { + "epoch": 2.25, + "learning_rate": 2.4416619237336373e-05, + "loss": 0.3227, + "step": 1760 + }, + { + "epoch": 2.26, + "learning_rate": 2.4359704040978943e-05, + "loss": 0.3716, + "step": 1765 + }, + { + "epoch": 2.27, + "learning_rate": 2.4302788844621517e-05, + "loss": 0.3669, + "step": 1770 + }, + { + "epoch": 2.27, + "learning_rate": 2.4245873648264087e-05, + "loss": 0.3195, + "step": 1775 + }, + { + "epoch": 2.28, + "learning_rate": 2.418895845190666e-05, + "loss": 0.3147, + "step": 1780 + }, + { + "epoch": 2.29, + "learning_rate": 2.4132043255549237e-05, + "loss": 0.339, + "step": 1785 + }, + { + "epoch": 2.29, + "learning_rate": 2.4075128059191807e-05, + "loss": 0.3949, + "step": 1790 + }, + { + "epoch": 2.3, + "learning_rate": 2.401821286283438e-05, + "loss": 0.2976, + "step": 1795 + }, + { + "epoch": 2.3, + "learning_rate": 2.396129766647695e-05, + "loss": 0.4075, + "step": 1800 + }, + { + "epoch": 2.31, + "learning_rate": 2.3904382470119523e-05, + "loss": 0.3482, + "step": 1805 + }, + { + "epoch": 2.32, + "learning_rate": 2.3847467273762093e-05, + "loss": 0.4089, + "step": 1810 + }, + { + "epoch": 2.32, + "learning_rate": 2.379055207740467e-05, + "loss": 0.3574, + "step": 1815 + }, + { + "epoch": 2.33, + "learning_rate": 2.373363688104724e-05, + "loss": 0.3617, + "step": 1820 + }, + { + "epoch": 2.34, + "learning_rate": 2.3676721684689814e-05, + "loss": 0.3421, + "step": 1825 + }, + { + "epoch": 2.34, + "learning_rate": 2.3619806488332387e-05, + "loss": 0.3523, + "step": 1830 + }, + { + "epoch": 2.35, + "learning_rate": 2.3562891291974957e-05, + "loss": 0.3594, + "step": 1835 + }, + { + "epoch": 2.36, + "learning_rate": 2.3505976095617534e-05, + "loss": 0.3177, + "step": 1840 + }, + { + "epoch": 2.36, + "learning_rate": 2.3449060899260104e-05, + "loss": 0.3867, + "step": 1845 + }, + { + "epoch": 2.37, + "learning_rate": 2.3392145702902677e-05, + "loss": 0.3826, + "step": 1850 + }, + { + "epoch": 2.38, + "learning_rate": 2.3335230506545247e-05, + "loss": 0.2243, + "step": 1855 + }, + { + "epoch": 2.38, + "learning_rate": 2.3278315310187824e-05, + "loss": 0.3039, + "step": 1860 + }, + { + "epoch": 2.39, + "learning_rate": 2.3221400113830397e-05, + "loss": 0.3555, + "step": 1865 + }, + { + "epoch": 2.39, + "learning_rate": 2.3164484917472967e-05, + "loss": 0.3321, + "step": 1870 + }, + { + "epoch": 2.4, + "learning_rate": 2.310756972111554e-05, + "loss": 0.3334, + "step": 1875 + }, + { + "epoch": 2.41, + "learning_rate": 2.305065452475811e-05, + "loss": 0.3629, + "step": 1880 + }, + { + "epoch": 2.41, + "learning_rate": 2.2993739328400687e-05, + "loss": 0.2421, + "step": 1885 + }, + { + "epoch": 2.42, + "learning_rate": 2.2936824132043257e-05, + "loss": 0.3204, + "step": 1890 + }, + { + "epoch": 2.43, + "learning_rate": 2.287990893568583e-05, + "loss": 0.3631, + "step": 1895 + }, + { + "epoch": 2.43, + "learning_rate": 2.28229937393284e-05, + "loss": 0.3279, + "step": 1900 + }, + { + "epoch": 2.44, + "learning_rate": 2.2766078542970974e-05, + "loss": 0.3008, + "step": 1905 + }, + { + "epoch": 2.45, + "learning_rate": 2.270916334661355e-05, + "loss": 0.4036, + "step": 1910 + }, + { + "epoch": 2.45, + "learning_rate": 2.265224815025612e-05, + "loss": 0.3201, + "step": 1915 + }, + { + "epoch": 2.46, + "learning_rate": 2.2595332953898694e-05, + "loss": 0.3041, + "step": 1920 + }, + { + "epoch": 2.46, + "learning_rate": 2.2538417757541264e-05, + "loss": 0.3208, + "step": 1925 + }, + { + "epoch": 2.47, + "learning_rate": 2.2481502561183838e-05, + "loss": 0.2943, + "step": 1930 + }, + { + "epoch": 2.48, + "learning_rate": 2.2424587364826408e-05, + "loss": 0.2831, + "step": 1935 + }, + { + "epoch": 2.48, + "learning_rate": 2.2367672168468984e-05, + "loss": 0.3645, + "step": 1940 + }, + { + "epoch": 2.49, + "learning_rate": 2.2310756972111554e-05, + "loss": 0.3532, + "step": 1945 + }, + { + "epoch": 2.5, + "learning_rate": 2.2253841775754128e-05, + "loss": 0.3504, + "step": 1950 + }, + { + "epoch": 2.5, + "learning_rate": 2.21969265793967e-05, + "loss": 0.3465, + "step": 1955 + }, + { + "epoch": 2.51, + "learning_rate": 2.214001138303927e-05, + "loss": 0.358, + "step": 1960 + }, + { + "epoch": 2.52, + "learning_rate": 2.2083096186681848e-05, + "loss": 0.3855, + "step": 1965 + }, + { + "epoch": 2.52, + "learning_rate": 2.2026180990324418e-05, + "loss": 0.2887, + "step": 1970 + }, + { + "epoch": 2.53, + "learning_rate": 2.196926579396699e-05, + "loss": 0.275, + "step": 1975 + }, + { + "epoch": 2.54, + "learning_rate": 2.191235059760956e-05, + "loss": 0.2384, + "step": 1980 + }, + { + "epoch": 2.54, + "learning_rate": 2.1855435401252138e-05, + "loss": 0.2829, + "step": 1985 + }, + { + "epoch": 2.55, + "learning_rate": 2.179852020489471e-05, + "loss": 0.3765, + "step": 1990 + }, + { + "epoch": 2.55, + "learning_rate": 2.174160500853728e-05, + "loss": 0.3509, + "step": 1995 + }, + { + "epoch": 2.56, + "learning_rate": 2.1684689812179855e-05, + "loss": 0.3517, + "step": 2000 + }, + { + "epoch": 2.57, + "learning_rate": 2.1627774615822425e-05, + "loss": 0.3016, + "step": 2005 + }, + { + "epoch": 2.57, + "learning_rate": 2.1570859419465e-05, + "loss": 0.3421, + "step": 2010 + }, + { + "epoch": 2.58, + "learning_rate": 2.151394422310757e-05, + "loss": 0.3054, + "step": 2015 + }, + { + "epoch": 2.59, + "learning_rate": 2.1457029026750145e-05, + "loss": 0.3658, + "step": 2020 + }, + { + "epoch": 2.59, + "learning_rate": 2.1400113830392715e-05, + "loss": 0.2979, + "step": 2025 + }, + { + "epoch": 2.6, + "learning_rate": 2.1343198634035288e-05, + "loss": 0.413, + "step": 2030 + }, + { + "epoch": 2.61, + "learning_rate": 2.1286283437677865e-05, + "loss": 0.3388, + "step": 2035 + }, + { + "epoch": 2.61, + "learning_rate": 2.1229368241320435e-05, + "loss": 0.2758, + "step": 2040 + }, + { + "epoch": 2.62, + "learning_rate": 2.117245304496301e-05, + "loss": 0.2786, + "step": 2045 + }, + { + "epoch": 2.62, + "learning_rate": 2.111553784860558e-05, + "loss": 0.2577, + "step": 2050 + }, + { + "epoch": 2.63, + "learning_rate": 2.1058622652248152e-05, + "loss": 0.26, + "step": 2055 + }, + { + "epoch": 2.64, + "learning_rate": 2.1001707455890722e-05, + "loss": 0.2994, + "step": 2060 + }, + { + "epoch": 2.64, + "learning_rate": 2.09447922595333e-05, + "loss": 0.2211, + "step": 2065 + }, + { + "epoch": 2.65, + "learning_rate": 2.088787706317587e-05, + "loss": 0.3152, + "step": 2070 + }, + { + "epoch": 2.66, + "learning_rate": 2.0830961866818442e-05, + "loss": 0.253, + "step": 2075 + }, + { + "epoch": 2.66, + "learning_rate": 2.0774046670461015e-05, + "loss": 0.3429, + "step": 2080 + }, + { + "epoch": 2.67, + "learning_rate": 2.0717131474103585e-05, + "loss": 0.2717, + "step": 2085 + }, + { + "epoch": 2.68, + "learning_rate": 2.0660216277746162e-05, + "loss": 0.2923, + "step": 2090 + }, + { + "epoch": 2.68, + "learning_rate": 2.0603301081388732e-05, + "loss": 0.2446, + "step": 2095 + }, + { + "epoch": 2.69, + "learning_rate": 2.0546385885031305e-05, + "loss": 0.2661, + "step": 2100 + }, + { + "epoch": 2.7, + "learning_rate": 2.0489470688673875e-05, + "loss": 0.3075, + "step": 2105 + }, + { + "epoch": 2.7, + "learning_rate": 2.0432555492316452e-05, + "loss": 0.3915, + "step": 2110 + }, + { + "epoch": 2.71, + "learning_rate": 2.0375640295959025e-05, + "loss": 0.385, + "step": 2115 + }, + { + "epoch": 2.71, + "learning_rate": 2.0318725099601595e-05, + "loss": 0.3714, + "step": 2120 + }, + { + "epoch": 2.72, + "learning_rate": 2.026180990324417e-05, + "loss": 0.3581, + "step": 2125 + }, + { + "epoch": 2.73, + "learning_rate": 2.020489470688674e-05, + "loss": 0.2439, + "step": 2130 + }, + { + "epoch": 2.73, + "learning_rate": 2.0147979510529316e-05, + "loss": 0.3, + "step": 2135 + }, + { + "epoch": 2.74, + "learning_rate": 2.0091064314171886e-05, + "loss": 0.2996, + "step": 2140 + }, + { + "epoch": 2.75, + "learning_rate": 2.003414911781446e-05, + "loss": 0.305, + "step": 2145 + }, + { + "epoch": 2.75, + "learning_rate": 1.9977233921457032e-05, + "loss": 0.3291, + "step": 2150 + }, + { + "epoch": 2.76, + "learning_rate": 1.9920318725099602e-05, + "loss": 0.2964, + "step": 2155 + }, + { + "epoch": 2.77, + "learning_rate": 1.9863403528742176e-05, + "loss": 0.4112, + "step": 2160 + }, + { + "epoch": 2.77, + "learning_rate": 1.980648833238475e-05, + "loss": 0.3476, + "step": 2165 + }, + { + "epoch": 2.78, + "learning_rate": 1.974957313602732e-05, + "loss": 0.314, + "step": 2170 + }, + { + "epoch": 2.78, + "learning_rate": 1.9692657939669892e-05, + "loss": 0.2829, + "step": 2175 + }, + { + "epoch": 2.79, + "learning_rate": 1.9635742743312466e-05, + "loss": 0.3628, + "step": 2180 + }, + { + "epoch": 2.8, + "learning_rate": 1.957882754695504e-05, + "loss": 0.2601, + "step": 2185 + }, + { + "epoch": 2.8, + "learning_rate": 1.9521912350597613e-05, + "loss": 0.401, + "step": 2190 + }, + { + "epoch": 2.81, + "learning_rate": 1.9464997154240183e-05, + "loss": 0.261, + "step": 2195 + }, + { + "epoch": 2.82, + "learning_rate": 1.9408081957882756e-05, + "loss": 0.3531, + "step": 2200 + }, + { + "epoch": 2.82, + "learning_rate": 1.935116676152533e-05, + "loss": 0.3118, + "step": 2205 + }, + { + "epoch": 2.83, + "learning_rate": 1.92942515651679e-05, + "loss": 0.3498, + "step": 2210 + }, + { + "epoch": 2.84, + "learning_rate": 1.9237336368810473e-05, + "loss": 0.3738, + "step": 2215 + }, + { + "epoch": 2.84, + "learning_rate": 1.9180421172453046e-05, + "loss": 0.2844, + "step": 2220 + }, + { + "epoch": 2.85, + "learning_rate": 1.912350597609562e-05, + "loss": 0.3668, + "step": 2225 + }, + { + "epoch": 2.86, + "learning_rate": 1.9066590779738193e-05, + "loss": 0.4105, + "step": 2230 + }, + { + "epoch": 2.86, + "learning_rate": 1.9009675583380763e-05, + "loss": 0.3562, + "step": 2235 + }, + { + "epoch": 2.87, + "learning_rate": 1.8952760387023336e-05, + "loss": 0.3053, + "step": 2240 + }, + { + "epoch": 2.87, + "learning_rate": 1.889584519066591e-05, + "loss": 0.3124, + "step": 2245 + }, + { + "epoch": 2.88, + "learning_rate": 1.8838929994308483e-05, + "loss": 0.3148, + "step": 2250 + }, + { + "epoch": 2.89, + "learning_rate": 1.8782014797951053e-05, + "loss": 0.2883, + "step": 2255 + }, + { + "epoch": 2.89, + "learning_rate": 1.8725099601593626e-05, + "loss": 0.3433, + "step": 2260 + }, + { + "epoch": 2.9, + "learning_rate": 1.86681844052362e-05, + "loss": 0.343, + "step": 2265 + }, + { + "epoch": 2.91, + "learning_rate": 1.8611269208878773e-05, + "loss": 0.2873, + "step": 2270 + }, + { + "epoch": 2.91, + "learning_rate": 1.8554354012521346e-05, + "loss": 0.3344, + "step": 2275 + }, + { + "epoch": 2.92, + "learning_rate": 1.8497438816163916e-05, + "loss": 0.2587, + "step": 2280 + }, + { + "epoch": 2.93, + "learning_rate": 1.844052361980649e-05, + "loss": 0.3247, + "step": 2285 + }, + { + "epoch": 2.93, + "learning_rate": 1.8383608423449063e-05, + "loss": 0.281, + "step": 2290 + }, + { + "epoch": 2.94, + "learning_rate": 1.8326693227091633e-05, + "loss": 0.2981, + "step": 2295 + }, + { + "epoch": 2.94, + "learning_rate": 1.8269778030734207e-05, + "loss": 0.228, + "step": 2300 + }, + { + "epoch": 2.95, + "learning_rate": 1.821286283437678e-05, + "loss": 0.3926, + "step": 2305 + }, + { + "epoch": 2.96, + "learning_rate": 1.8155947638019353e-05, + "loss": 0.2932, + "step": 2310 + }, + { + "epoch": 2.96, + "learning_rate": 1.8099032441661927e-05, + "loss": 0.364, + "step": 2315 + }, + { + "epoch": 2.97, + "learning_rate": 1.8042117245304497e-05, + "loss": 0.4113, + "step": 2320 + }, + { + "epoch": 2.98, + "learning_rate": 1.798520204894707e-05, + "loss": 0.3103, + "step": 2325 + }, + { + "epoch": 2.98, + "learning_rate": 1.7928286852589643e-05, + "loss": 0.2307, + "step": 2330 + }, + { + "epoch": 2.99, + "learning_rate": 1.7871371656232213e-05, + "loss": 0.2478, + "step": 2335 + }, + { + "epoch": 3.0, + "learning_rate": 1.7814456459874787e-05, + "loss": 0.2912, + "step": 2340 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.861, + "eval_loss": 0.4452311098575592, + "eval_runtime": 61.2046, + "eval_samples_per_second": 163.386, + "eval_steps_per_second": 10.212, + "step": 2343 + }, + { + "epoch": 3.0, + "learning_rate": 1.775754126351736e-05, + "loss": 0.2109, + "step": 2345 + }, + { + "epoch": 3.01, + "learning_rate": 1.7700626067159934e-05, + "loss": 0.2094, + "step": 2350 + }, + { + "epoch": 3.02, + "learning_rate": 1.7643710870802507e-05, + "loss": 0.2467, + "step": 2355 + }, + { + "epoch": 3.02, + "learning_rate": 1.7586795674445077e-05, + "loss": 0.2747, + "step": 2360 + }, + { + "epoch": 3.03, + "learning_rate": 1.752988047808765e-05, + "loss": 0.1656, + "step": 2365 + }, + { + "epoch": 3.03, + "learning_rate": 1.7472965281730224e-05, + "loss": 0.1659, + "step": 2370 + }, + { + "epoch": 3.04, + "learning_rate": 1.7416050085372797e-05, + "loss": 0.2871, + "step": 2375 + }, + { + "epoch": 3.05, + "learning_rate": 1.7359134889015367e-05, + "loss": 0.2369, + "step": 2380 + }, + { + "epoch": 3.05, + "learning_rate": 1.730221969265794e-05, + "loss": 0.2459, + "step": 2385 + }, + { + "epoch": 3.06, + "learning_rate": 1.7245304496300514e-05, + "loss": 0.1826, + "step": 2390 + }, + { + "epoch": 3.07, + "learning_rate": 1.7188389299943087e-05, + "loss": 0.2467, + "step": 2395 + }, + { + "epoch": 3.07, + "learning_rate": 1.713147410358566e-05, + "loss": 0.2196, + "step": 2400 + }, + { + "epoch": 3.08, + "learning_rate": 1.707455890722823e-05, + "loss": 0.2427, + "step": 2405 + }, + { + "epoch": 3.09, + "learning_rate": 1.7017643710870804e-05, + "loss": 0.2632, + "step": 2410 + }, + { + "epoch": 3.09, + "learning_rate": 1.6960728514513377e-05, + "loss": 0.1924, + "step": 2415 + }, + { + "epoch": 3.1, + "learning_rate": 1.6903813318155947e-05, + "loss": 0.2471, + "step": 2420 + }, + { + "epoch": 3.1, + "learning_rate": 1.684689812179852e-05, + "loss": 0.2159, + "step": 2425 + }, + { + "epoch": 3.11, + "learning_rate": 1.6789982925441094e-05, + "loss": 0.2591, + "step": 2430 + }, + { + "epoch": 3.12, + "learning_rate": 1.6733067729083667e-05, + "loss": 0.2674, + "step": 2435 + }, + { + "epoch": 3.12, + "learning_rate": 1.667615253272624e-05, + "loss": 0.2457, + "step": 2440 + }, + { + "epoch": 3.13, + "learning_rate": 1.661923733636881e-05, + "loss": 0.2419, + "step": 2445 + }, + { + "epoch": 3.14, + "learning_rate": 1.6562322140011384e-05, + "loss": 0.1977, + "step": 2450 + }, + { + "epoch": 3.14, + "learning_rate": 1.6505406943653958e-05, + "loss": 0.216, + "step": 2455 + }, + { + "epoch": 3.15, + "learning_rate": 1.6448491747296528e-05, + "loss": 0.2799, + "step": 2460 + }, + { + "epoch": 3.16, + "learning_rate": 1.63915765509391e-05, + "loss": 0.1789, + "step": 2465 + }, + { + "epoch": 3.16, + "learning_rate": 1.6334661354581674e-05, + "loss": 0.2677, + "step": 2470 + }, + { + "epoch": 3.17, + "learning_rate": 1.6277746158224248e-05, + "loss": 0.1893, + "step": 2475 + }, + { + "epoch": 3.18, + "learning_rate": 1.622083096186682e-05, + "loss": 0.1756, + "step": 2480 + }, + { + "epoch": 3.18, + "learning_rate": 1.616391576550939e-05, + "loss": 0.2038, + "step": 2485 + }, + { + "epoch": 3.19, + "learning_rate": 1.6107000569151964e-05, + "loss": 0.1776, + "step": 2490 + }, + { + "epoch": 3.19, + "learning_rate": 1.6050085372794538e-05, + "loss": 0.3071, + "step": 2495 + }, + { + "epoch": 3.2, + "learning_rate": 1.5993170176437108e-05, + "loss": 0.2819, + "step": 2500 + }, + { + "epoch": 3.21, + "learning_rate": 1.593625498007968e-05, + "loss": 0.2425, + "step": 2505 + }, + { + "epoch": 3.21, + "learning_rate": 1.5879339783722255e-05, + "loss": 0.2611, + "step": 2510 + }, + { + "epoch": 3.22, + "learning_rate": 1.5822424587364828e-05, + "loss": 0.1911, + "step": 2515 + }, + { + "epoch": 3.23, + "learning_rate": 1.57655093910074e-05, + "loss": 0.2089, + "step": 2520 + }, + { + "epoch": 3.23, + "learning_rate": 1.5708594194649975e-05, + "loss": 0.2004, + "step": 2525 + }, + { + "epoch": 3.24, + "learning_rate": 1.5651678998292545e-05, + "loss": 0.2162, + "step": 2530 + }, + { + "epoch": 3.25, + "learning_rate": 1.5594763801935118e-05, + "loss": 0.2117, + "step": 2535 + }, + { + "epoch": 3.25, + "learning_rate": 1.553784860557769e-05, + "loss": 0.2401, + "step": 2540 + }, + { + "epoch": 3.26, + "learning_rate": 1.548093340922026e-05, + "loss": 0.2056, + "step": 2545 + }, + { + "epoch": 3.26, + "learning_rate": 1.5424018212862835e-05, + "loss": 0.23, + "step": 2550 + }, + { + "epoch": 3.27, + "learning_rate": 1.5367103016505408e-05, + "loss": 0.2444, + "step": 2555 + }, + { + "epoch": 3.28, + "learning_rate": 1.531018782014798e-05, + "loss": 0.3034, + "step": 2560 + }, + { + "epoch": 3.28, + "learning_rate": 1.5253272623790553e-05, + "loss": 0.1683, + "step": 2565 + }, + { + "epoch": 3.29, + "learning_rate": 1.5196357427433127e-05, + "loss": 0.1557, + "step": 2570 + }, + { + "epoch": 3.3, + "learning_rate": 1.5139442231075698e-05, + "loss": 0.1504, + "step": 2575 + }, + { + "epoch": 3.3, + "learning_rate": 1.508252703471827e-05, + "loss": 0.2048, + "step": 2580 + }, + { + "epoch": 3.31, + "learning_rate": 1.5025611838360843e-05, + "loss": 0.2511, + "step": 2585 + }, + { + "epoch": 3.32, + "learning_rate": 1.4968696642003415e-05, + "loss": 0.204, + "step": 2590 + }, + { + "epoch": 3.32, + "learning_rate": 1.491178144564599e-05, + "loss": 0.2699, + "step": 2595 + }, + { + "epoch": 3.33, + "learning_rate": 1.4854866249288562e-05, + "loss": 0.1463, + "step": 2600 + }, + { + "epoch": 3.34, + "learning_rate": 1.4797951052931133e-05, + "loss": 0.1645, + "step": 2605 + }, + { + "epoch": 3.34, + "learning_rate": 1.4741035856573707e-05, + "loss": 0.1908, + "step": 2610 + }, + { + "epoch": 3.35, + "learning_rate": 1.4684120660216279e-05, + "loss": 0.2191, + "step": 2615 + }, + { + "epoch": 3.35, + "learning_rate": 1.4627205463858852e-05, + "loss": 0.2372, + "step": 2620 + }, + { + "epoch": 3.36, + "learning_rate": 1.4570290267501424e-05, + "loss": 0.2423, + "step": 2625 + }, + { + "epoch": 3.37, + "learning_rate": 1.4513375071143995e-05, + "loss": 0.141, + "step": 2630 + }, + { + "epoch": 3.37, + "learning_rate": 1.4456459874786569e-05, + "loss": 0.2345, + "step": 2635 + }, + { + "epoch": 3.38, + "learning_rate": 1.4399544678429142e-05, + "loss": 0.2508, + "step": 2640 + }, + { + "epoch": 3.39, + "learning_rate": 1.4342629482071715e-05, + "loss": 0.1987, + "step": 2645 + }, + { + "epoch": 3.39, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.1662, + "step": 2650 + }, + { + "epoch": 3.4, + "learning_rate": 1.4228799089356859e-05, + "loss": 0.1729, + "step": 2655 + }, + { + "epoch": 3.41, + "learning_rate": 1.4171883892999432e-05, + "loss": 0.2623, + "step": 2660 + }, + { + "epoch": 3.41, + "learning_rate": 1.4114968696642004e-05, + "loss": 0.2242, + "step": 2665 + }, + { + "epoch": 3.42, + "learning_rate": 1.4058053500284576e-05, + "loss": 0.1906, + "step": 2670 + }, + { + "epoch": 3.42, + "learning_rate": 1.4001138303927149e-05, + "loss": 0.243, + "step": 2675 + }, + { + "epoch": 3.43, + "learning_rate": 1.3944223107569724e-05, + "loss": 0.2251, + "step": 2680 + }, + { + "epoch": 3.44, + "learning_rate": 1.3887307911212296e-05, + "loss": 0.2056, + "step": 2685 + }, + { + "epoch": 3.44, + "learning_rate": 1.3830392714854867e-05, + "loss": 0.2007, + "step": 2690 + }, + { + "epoch": 3.45, + "learning_rate": 1.377347751849744e-05, + "loss": 0.2273, + "step": 2695 + }, + { + "epoch": 3.46, + "learning_rate": 1.3716562322140012e-05, + "loss": 0.2652, + "step": 2700 + }, + { + "epoch": 3.46, + "learning_rate": 1.3659647125782584e-05, + "loss": 0.1765, + "step": 2705 + }, + { + "epoch": 3.47, + "learning_rate": 1.3602731929425157e-05, + "loss": 0.2289, + "step": 2710 + }, + { + "epoch": 3.48, + "learning_rate": 1.354581673306773e-05, + "loss": 0.2561, + "step": 2715 + }, + { + "epoch": 3.48, + "learning_rate": 1.3488901536710304e-05, + "loss": 0.2211, + "step": 2720 + }, + { + "epoch": 3.49, + "learning_rate": 1.3431986340352876e-05, + "loss": 0.1894, + "step": 2725 + }, + { + "epoch": 3.5, + "learning_rate": 1.3375071143995448e-05, + "loss": 0.1795, + "step": 2730 + }, + { + "epoch": 3.5, + "learning_rate": 1.3318155947638021e-05, + "loss": 0.1967, + "step": 2735 + }, + { + "epoch": 3.51, + "learning_rate": 1.3261240751280593e-05, + "loss": 0.2562, + "step": 2740 + }, + { + "epoch": 3.51, + "learning_rate": 1.3204325554923166e-05, + "loss": 0.2178, + "step": 2745 + }, + { + "epoch": 3.52, + "learning_rate": 1.3147410358565738e-05, + "loss": 0.1908, + "step": 2750 + }, + { + "epoch": 3.53, + "learning_rate": 1.309049516220831e-05, + "loss": 0.1789, + "step": 2755 + }, + { + "epoch": 3.53, + "learning_rate": 1.3033579965850883e-05, + "loss": 0.2742, + "step": 2760 + }, + { + "epoch": 3.54, + "learning_rate": 1.2976664769493456e-05, + "loss": 0.2605, + "step": 2765 + }, + { + "epoch": 3.55, + "learning_rate": 1.291974957313603e-05, + "loss": 0.2138, + "step": 2770 + }, + { + "epoch": 3.55, + "learning_rate": 1.2862834376778601e-05, + "loss": 0.1998, + "step": 2775 + }, + { + "epoch": 3.56, + "learning_rate": 1.2805919180421173e-05, + "loss": 0.2454, + "step": 2780 + }, + { + "epoch": 3.57, + "learning_rate": 1.2749003984063746e-05, + "loss": 0.2261, + "step": 2785 + }, + { + "epoch": 3.57, + "learning_rate": 1.2692088787706318e-05, + "loss": 0.1907, + "step": 2790 + }, + { + "epoch": 3.58, + "learning_rate": 1.263517359134889e-05, + "loss": 0.1849, + "step": 2795 + }, + { + "epoch": 3.58, + "learning_rate": 1.2578258394991463e-05, + "loss": 0.2552, + "step": 2800 + }, + { + "epoch": 3.59, + "learning_rate": 1.2521343198634036e-05, + "loss": 0.186, + "step": 2805 + }, + { + "epoch": 3.6, + "learning_rate": 1.246442800227661e-05, + "loss": 0.1753, + "step": 2810 + }, + { + "epoch": 3.6, + "learning_rate": 1.2407512805919181e-05, + "loss": 0.188, + "step": 2815 + }, + { + "epoch": 3.61, + "learning_rate": 1.2350597609561755e-05, + "loss": 0.2063, + "step": 2820 + }, + { + "epoch": 3.62, + "learning_rate": 1.2293682413204327e-05, + "loss": 0.1565, + "step": 2825 + }, + { + "epoch": 3.62, + "learning_rate": 1.2236767216846898e-05, + "loss": 0.185, + "step": 2830 + }, + { + "epoch": 3.63, + "learning_rate": 1.2179852020489472e-05, + "loss": 0.2167, + "step": 2835 + }, + { + "epoch": 3.64, + "learning_rate": 1.2122936824132043e-05, + "loss": 0.1588, + "step": 2840 + }, + { + "epoch": 3.64, + "learning_rate": 1.2066021627774618e-05, + "loss": 0.2709, + "step": 2845 + }, + { + "epoch": 3.65, + "learning_rate": 1.200910643141719e-05, + "loss": 0.2097, + "step": 2850 + }, + { + "epoch": 3.66, + "learning_rate": 1.1952191235059762e-05, + "loss": 0.1697, + "step": 2855 + }, + { + "epoch": 3.66, + "learning_rate": 1.1895276038702335e-05, + "loss": 0.2032, + "step": 2860 + }, + { + "epoch": 3.67, + "learning_rate": 1.1838360842344907e-05, + "loss": 0.2468, + "step": 2865 + }, + { + "epoch": 3.67, + "learning_rate": 1.1781445645987478e-05, + "loss": 0.1398, + "step": 2870 + }, + { + "epoch": 3.68, + "learning_rate": 1.1724530449630052e-05, + "loss": 0.2453, + "step": 2875 + }, + { + "epoch": 3.69, + "learning_rate": 1.1667615253272624e-05, + "loss": 0.2397, + "step": 2880 + }, + { + "epoch": 3.69, + "learning_rate": 1.1610700056915199e-05, + "loss": 0.1835, + "step": 2885 + }, + { + "epoch": 3.7, + "learning_rate": 1.155378486055777e-05, + "loss": 0.2497, + "step": 2890 + }, + { + "epoch": 3.71, + "learning_rate": 1.1496869664200344e-05, + "loss": 0.1499, + "step": 2895 + }, + { + "epoch": 3.71, + "learning_rate": 1.1439954467842915e-05, + "loss": 0.2455, + "step": 2900 + }, + { + "epoch": 3.72, + "learning_rate": 1.1383039271485487e-05, + "loss": 0.2016, + "step": 2905 + }, + { + "epoch": 3.73, + "learning_rate": 1.132612407512806e-05, + "loss": 0.2249, + "step": 2910 + }, + { + "epoch": 3.73, + "learning_rate": 1.1269208878770632e-05, + "loss": 0.1286, + "step": 2915 + }, + { + "epoch": 3.74, + "learning_rate": 1.1212293682413204e-05, + "loss": 0.2297, + "step": 2920 + }, + { + "epoch": 3.74, + "learning_rate": 1.1155378486055777e-05, + "loss": 0.1435, + "step": 2925 + }, + { + "epoch": 3.75, + "learning_rate": 1.109846328969835e-05, + "loss": 0.1694, + "step": 2930 + }, + { + "epoch": 3.76, + "learning_rate": 1.1041548093340924e-05, + "loss": 0.2167, + "step": 2935 + }, + { + "epoch": 3.76, + "learning_rate": 1.0984632896983496e-05, + "loss": 0.1979, + "step": 2940 + }, + { + "epoch": 3.77, + "learning_rate": 1.0927717700626069e-05, + "loss": 0.1548, + "step": 2945 + }, + { + "epoch": 3.78, + "learning_rate": 1.087080250426864e-05, + "loss": 0.2188, + "step": 2950 + }, + { + "epoch": 3.78, + "learning_rate": 1.0813887307911212e-05, + "loss": 0.2313, + "step": 2955 + }, + { + "epoch": 3.79, + "learning_rate": 1.0756972111553786e-05, + "loss": 0.2211, + "step": 2960 + }, + { + "epoch": 3.8, + "learning_rate": 1.0700056915196357e-05, + "loss": 0.1612, + "step": 2965 + }, + { + "epoch": 3.8, + "learning_rate": 1.0643141718838932e-05, + "loss": 0.2125, + "step": 2970 + }, + { + "epoch": 3.81, + "learning_rate": 1.0586226522481504e-05, + "loss": 0.206, + "step": 2975 + }, + { + "epoch": 3.82, + "learning_rate": 1.0529311326124076e-05, + "loss": 0.2112, + "step": 2980 + }, + { + "epoch": 3.82, + "learning_rate": 1.047239612976665e-05, + "loss": 0.1762, + "step": 2985 + }, + { + "epoch": 3.83, + "learning_rate": 1.0415480933409221e-05, + "loss": 0.169, + "step": 2990 + }, + { + "epoch": 3.83, + "learning_rate": 1.0358565737051793e-05, + "loss": 0.2013, + "step": 2995 + }, + { + "epoch": 3.84, + "learning_rate": 1.0301650540694366e-05, + "loss": 0.1734, + "step": 3000 + }, + { + "epoch": 3.85, + "learning_rate": 1.0244735344336938e-05, + "loss": 0.215, + "step": 3005 + }, + { + "epoch": 3.85, + "learning_rate": 1.0187820147979513e-05, + "loss": 0.2166, + "step": 3010 + }, + { + "epoch": 3.86, + "learning_rate": 1.0130904951622084e-05, + "loss": 0.2166, + "step": 3015 + }, + { + "epoch": 3.87, + "learning_rate": 1.0073989755264658e-05, + "loss": 0.1942, + "step": 3020 + }, + { + "epoch": 3.87, + "learning_rate": 1.001707455890723e-05, + "loss": 0.1952, + "step": 3025 + }, + { + "epoch": 3.88, + "learning_rate": 9.960159362549801e-06, + "loss": 0.1974, + "step": 3030 + }, + { + "epoch": 3.89, + "learning_rate": 9.903244166192375e-06, + "loss": 0.2231, + "step": 3035 + }, + { + "epoch": 3.89, + "learning_rate": 9.846328969834946e-06, + "loss": 0.2053, + "step": 3040 + }, + { + "epoch": 3.9, + "learning_rate": 9.78941377347752e-06, + "loss": 0.1894, + "step": 3045 + }, + { + "epoch": 3.9, + "learning_rate": 9.732498577120091e-06, + "loss": 0.2454, + "step": 3050 + }, + { + "epoch": 3.91, + "learning_rate": 9.675583380762665e-06, + "loss": 0.1853, + "step": 3055 + }, + { + "epoch": 3.92, + "learning_rate": 9.618668184405236e-06, + "loss": 0.2468, + "step": 3060 + }, + { + "epoch": 3.92, + "learning_rate": 9.56175298804781e-06, + "loss": 0.1915, + "step": 3065 + }, + { + "epoch": 3.93, + "learning_rate": 9.504837791690381e-06, + "loss": 0.2251, + "step": 3070 + }, + { + "epoch": 3.94, + "learning_rate": 9.447922595332955e-06, + "loss": 0.1638, + "step": 3075 + }, + { + "epoch": 3.94, + "learning_rate": 9.391007398975526e-06, + "loss": 0.16, + "step": 3080 + }, + { + "epoch": 3.95, + "learning_rate": 9.3340922026181e-06, + "loss": 0.1759, + "step": 3085 + }, + { + "epoch": 3.96, + "learning_rate": 9.277177006260673e-06, + "loss": 0.2079, + "step": 3090 + }, + { + "epoch": 3.96, + "learning_rate": 9.220261809903245e-06, + "loss": 0.22, + "step": 3095 + }, + { + "epoch": 3.97, + "learning_rate": 9.163346613545817e-06, + "loss": 0.2352, + "step": 3100 + }, + { + "epoch": 3.98, + "learning_rate": 9.10643141718839e-06, + "loss": 0.1975, + "step": 3105 + }, + { + "epoch": 3.98, + "learning_rate": 9.049516220830963e-06, + "loss": 0.2027, + "step": 3110 + }, + { + "epoch": 3.99, + "learning_rate": 8.992601024473535e-06, + "loss": 0.1782, + "step": 3115 + }, + { + "epoch": 3.99, + "learning_rate": 8.935685828116107e-06, + "loss": 0.2234, + "step": 3120 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8679, + "eval_loss": 0.43295106291770935, + "eval_runtime": 60.931, + "eval_samples_per_second": 164.12, + "eval_steps_per_second": 10.257, + "step": 3124 + }, + { + "epoch": 4.0, + "learning_rate": 8.87877063175868e-06, + "loss": 0.1934, + "step": 3125 + }, + { + "epoch": 4.01, + "learning_rate": 8.821855435401253e-06, + "loss": 0.1317, + "step": 3130 + }, + { + "epoch": 4.01, + "learning_rate": 8.764940239043825e-06, + "loss": 0.2049, + "step": 3135 + }, + { + "epoch": 4.02, + "learning_rate": 8.708025042686399e-06, + "loss": 0.1348, + "step": 3140 + }, + { + "epoch": 4.03, + "learning_rate": 8.65110984632897e-06, + "loss": 0.1759, + "step": 3145 + }, + { + "epoch": 4.03, + "learning_rate": 8.594194649971544e-06, + "loss": 0.1538, + "step": 3150 + }, + { + "epoch": 4.04, + "learning_rate": 8.537279453614115e-06, + "loss": 0.1096, + "step": 3155 + }, + { + "epoch": 4.05, + "learning_rate": 8.480364257256689e-06, + "loss": 0.1689, + "step": 3160 + }, + { + "epoch": 4.05, + "learning_rate": 8.42344906089926e-06, + "loss": 0.1647, + "step": 3165 + }, + { + "epoch": 4.06, + "learning_rate": 8.366533864541834e-06, + "loss": 0.1881, + "step": 3170 + }, + { + "epoch": 4.07, + "learning_rate": 8.309618668184405e-06, + "loss": 0.1345, + "step": 3175 + }, + { + "epoch": 4.07, + "learning_rate": 8.252703471826979e-06, + "loss": 0.134, + "step": 3180 + }, + { + "epoch": 4.08, + "learning_rate": 8.19578827546955e-06, + "loss": 0.1413, + "step": 3185 + }, + { + "epoch": 4.08, + "learning_rate": 8.138873079112124e-06, + "loss": 0.1382, + "step": 3190 + }, + { + "epoch": 4.09, + "learning_rate": 8.081957882754696e-06, + "loss": 0.1666, + "step": 3195 + }, + { + "epoch": 4.1, + "learning_rate": 8.025042686397269e-06, + "loss": 0.1115, + "step": 3200 + }, + { + "epoch": 4.1, + "learning_rate": 7.96812749003984e-06, + "loss": 0.1575, + "step": 3205 + }, + { + "epoch": 4.11, + "learning_rate": 7.911212293682414e-06, + "loss": 0.1469, + "step": 3210 + }, + { + "epoch": 4.12, + "learning_rate": 7.854297097324987e-06, + "loss": 0.1367, + "step": 3215 + }, + { + "epoch": 4.12, + "learning_rate": 7.797381900967559e-06, + "loss": 0.1432, + "step": 3220 + }, + { + "epoch": 4.13, + "learning_rate": 7.74046670461013e-06, + "loss": 0.1375, + "step": 3225 + }, + { + "epoch": 4.14, + "learning_rate": 7.683551508252704e-06, + "loss": 0.1574, + "step": 3230 + }, + { + "epoch": 4.14, + "learning_rate": 7.626636311895277e-06, + "loss": 0.1289, + "step": 3235 + }, + { + "epoch": 4.15, + "learning_rate": 7.569721115537849e-06, + "loss": 0.1744, + "step": 3240 + }, + { + "epoch": 4.15, + "learning_rate": 7.512805919180422e-06, + "loss": 0.1705, + "step": 3245 + }, + { + "epoch": 4.16, + "learning_rate": 7.455890722822995e-06, + "loss": 0.1719, + "step": 3250 + }, + { + "epoch": 4.17, + "learning_rate": 7.398975526465567e-06, + "loss": 0.1454, + "step": 3255 + }, + { + "epoch": 4.17, + "learning_rate": 7.342060330108139e-06, + "loss": 0.1038, + "step": 3260 + }, + { + "epoch": 4.18, + "learning_rate": 7.285145133750712e-06, + "loss": 0.1544, + "step": 3265 + }, + { + "epoch": 4.19, + "learning_rate": 7.228229937393284e-06, + "loss": 0.1299, + "step": 3270 + }, + { + "epoch": 4.19, + "learning_rate": 7.171314741035858e-06, + "loss": 0.1465, + "step": 3275 + }, + { + "epoch": 4.2, + "learning_rate": 7.114399544678429e-06, + "loss": 0.1236, + "step": 3280 + }, + { + "epoch": 4.21, + "learning_rate": 7.057484348321002e-06, + "loss": 0.1329, + "step": 3285 + }, + { + "epoch": 4.21, + "learning_rate": 7.0005691519635745e-06, + "loss": 0.1716, + "step": 3290 + }, + { + "epoch": 4.22, + "learning_rate": 6.943653955606148e-06, + "loss": 0.1225, + "step": 3295 + }, + { + "epoch": 4.23, + "learning_rate": 6.88673875924872e-06, + "loss": 0.1124, + "step": 3300 + }, + { + "epoch": 4.23, + "learning_rate": 6.829823562891292e-06, + "loss": 0.1425, + "step": 3305 + }, + { + "epoch": 4.24, + "learning_rate": 6.772908366533865e-06, + "loss": 0.1209, + "step": 3310 + }, + { + "epoch": 4.24, + "learning_rate": 6.715993170176438e-06, + "loss": 0.1293, + "step": 3315 + }, + { + "epoch": 4.25, + "learning_rate": 6.6590779738190105e-06, + "loss": 0.1592, + "step": 3320 + }, + { + "epoch": 4.26, + "learning_rate": 6.602162777461583e-06, + "loss": 0.1525, + "step": 3325 + }, + { + "epoch": 4.26, + "learning_rate": 6.545247581104155e-06, + "loss": 0.1993, + "step": 3330 + }, + { + "epoch": 4.27, + "learning_rate": 6.488332384746728e-06, + "loss": 0.1312, + "step": 3335 + }, + { + "epoch": 4.28, + "learning_rate": 6.431417188389301e-06, + "loss": 0.1318, + "step": 3340 + }, + { + "epoch": 4.28, + "learning_rate": 6.374501992031873e-06, + "loss": 0.1576, + "step": 3345 + }, + { + "epoch": 4.29, + "learning_rate": 6.317586795674445e-06, + "loss": 0.1417, + "step": 3350 + }, + { + "epoch": 4.3, + "learning_rate": 6.260671599317018e-06, + "loss": 0.1696, + "step": 3355 + }, + { + "epoch": 4.3, + "learning_rate": 6.203756402959591e-06, + "loss": 0.1711, + "step": 3360 + }, + { + "epoch": 4.31, + "learning_rate": 6.146841206602163e-06, + "loss": 0.187, + "step": 3365 + }, + { + "epoch": 4.31, + "learning_rate": 6.089926010244736e-06, + "loss": 0.1165, + "step": 3370 + }, + { + "epoch": 4.32, + "learning_rate": 6.033010813887309e-06, + "loss": 0.1282, + "step": 3375 + }, + { + "epoch": 4.33, + "learning_rate": 5.976095617529881e-06, + "loss": 0.1528, + "step": 3380 + }, + { + "epoch": 4.33, + "learning_rate": 5.919180421172453e-06, + "loss": 0.1477, + "step": 3385 + }, + { + "epoch": 4.34, + "learning_rate": 5.862265224815026e-06, + "loss": 0.1666, + "step": 3390 + }, + { + "epoch": 4.35, + "learning_rate": 5.805350028457599e-06, + "loss": 0.1881, + "step": 3395 + }, + { + "epoch": 4.35, + "learning_rate": 5.748434832100172e-06, + "loss": 0.1377, + "step": 3400 + }, + { + "epoch": 4.36, + "learning_rate": 5.6915196357427435e-06, + "loss": 0.16, + "step": 3405 + }, + { + "epoch": 4.37, + "learning_rate": 5.634604439385316e-06, + "loss": 0.1488, + "step": 3410 + }, + { + "epoch": 4.37, + "learning_rate": 5.577689243027889e-06, + "loss": 0.143, + "step": 3415 + }, + { + "epoch": 4.38, + "learning_rate": 5.520774046670462e-06, + "loss": 0.1414, + "step": 3420 + }, + { + "epoch": 4.39, + "learning_rate": 5.4638588503130345e-06, + "loss": 0.1979, + "step": 3425 + }, + { + "epoch": 4.39, + "learning_rate": 5.406943653955606e-06, + "loss": 0.1419, + "step": 3430 + }, + { + "epoch": 4.4, + "learning_rate": 5.350028457598179e-06, + "loss": 0.1216, + "step": 3435 + }, + { + "epoch": 4.4, + "learning_rate": 5.293113261240752e-06, + "loss": 0.1433, + "step": 3440 + }, + { + "epoch": 4.41, + "learning_rate": 5.236198064883325e-06, + "loss": 0.1615, + "step": 3445 + }, + { + "epoch": 4.42, + "learning_rate": 5.179282868525896e-06, + "loss": 0.1652, + "step": 3450 + }, + { + "epoch": 4.42, + "learning_rate": 5.122367672168469e-06, + "loss": 0.2001, + "step": 3455 + }, + { + "epoch": 4.43, + "learning_rate": 5.065452475811042e-06, + "loss": 0.1397, + "step": 3460 + }, + { + "epoch": 4.44, + "learning_rate": 5.008537279453615e-06, + "loss": 0.1599, + "step": 3465 + }, + { + "epoch": 4.44, + "learning_rate": 4.951622083096187e-06, + "loss": 0.1167, + "step": 3470 + }, + { + "epoch": 4.45, + "learning_rate": 4.89470688673876e-06, + "loss": 0.1473, + "step": 3475 + }, + { + "epoch": 4.46, + "learning_rate": 4.837791690381332e-06, + "loss": 0.1486, + "step": 3480 + }, + { + "epoch": 4.46, + "learning_rate": 4.780876494023905e-06, + "loss": 0.1604, + "step": 3485 + }, + { + "epoch": 4.47, + "learning_rate": 4.723961297666477e-06, + "loss": 0.1364, + "step": 3490 + }, + { + "epoch": 4.47, + "learning_rate": 4.66704610130905e-06, + "loss": 0.183, + "step": 3495 + }, + { + "epoch": 4.48, + "learning_rate": 4.6101309049516225e-06, + "loss": 0.1803, + "step": 3500 + }, + { + "epoch": 4.49, + "learning_rate": 4.553215708594195e-06, + "loss": 0.1405, + "step": 3505 + }, + { + "epoch": 4.49, + "learning_rate": 4.4963005122367675e-06, + "loss": 0.1436, + "step": 3510 + }, + { + "epoch": 4.5, + "learning_rate": 4.43938531587934e-06, + "loss": 0.1442, + "step": 3515 + }, + { + "epoch": 4.51, + "learning_rate": 4.382470119521913e-06, + "loss": 0.0979, + "step": 3520 + }, + { + "epoch": 4.51, + "learning_rate": 4.325554923164485e-06, + "loss": 0.1218, + "step": 3525 + }, + { + "epoch": 4.52, + "learning_rate": 4.268639726807058e-06, + "loss": 0.1965, + "step": 3530 + }, + { + "epoch": 4.53, + "learning_rate": 4.21172453044963e-06, + "loss": 0.161, + "step": 3535 + }, + { + "epoch": 4.53, + "learning_rate": 4.154809334092203e-06, + "loss": 0.1378, + "step": 3540 + }, + { + "epoch": 4.54, + "learning_rate": 4.097894137734775e-06, + "loss": 0.1333, + "step": 3545 + }, + { + "epoch": 4.55, + "learning_rate": 4.040978941377348e-06, + "loss": 0.137, + "step": 3550 + }, + { + "epoch": 4.55, + "learning_rate": 3.98406374501992e-06, + "loss": 0.1652, + "step": 3555 + }, + { + "epoch": 4.56, + "learning_rate": 3.927148548662494e-06, + "loss": 0.1655, + "step": 3560 + }, + { + "epoch": 4.56, + "learning_rate": 3.870233352305065e-06, + "loss": 0.1791, + "step": 3565 + }, + { + "epoch": 4.57, + "learning_rate": 3.8133181559476383e-06, + "loss": 0.128, + "step": 3570 + }, + { + "epoch": 4.58, + "learning_rate": 3.756402959590211e-06, + "loss": 0.1567, + "step": 3575 + }, + { + "epoch": 4.58, + "learning_rate": 3.6994877632327834e-06, + "loss": 0.1564, + "step": 3580 + }, + { + "epoch": 4.59, + "learning_rate": 3.642572566875356e-06, + "loss": 0.1369, + "step": 3585 + }, + { + "epoch": 4.6, + "learning_rate": 3.585657370517929e-06, + "loss": 0.176, + "step": 3590 + }, + { + "epoch": 4.6, + "learning_rate": 3.528742174160501e-06, + "loss": 0.1456, + "step": 3595 + }, + { + "epoch": 4.61, + "learning_rate": 3.471826977803074e-06, + "loss": 0.1346, + "step": 3600 + }, + { + "epoch": 4.62, + "learning_rate": 3.414911781445646e-06, + "loss": 0.1565, + "step": 3605 + }, + { + "epoch": 4.62, + "learning_rate": 3.357996585088219e-06, + "loss": 0.1792, + "step": 3610 + }, + { + "epoch": 4.63, + "learning_rate": 3.3010813887307915e-06, + "loss": 0.1411, + "step": 3615 + }, + { + "epoch": 4.63, + "learning_rate": 3.244166192373364e-06, + "loss": 0.1847, + "step": 3620 + }, + { + "epoch": 4.64, + "learning_rate": 3.1872509960159366e-06, + "loss": 0.19, + "step": 3625 + }, + { + "epoch": 4.65, + "learning_rate": 3.130335799658509e-06, + "loss": 0.1474, + "step": 3630 + }, + { + "epoch": 4.65, + "learning_rate": 3.0734206033010816e-06, + "loss": 0.1869, + "step": 3635 + }, + { + "epoch": 4.66, + "learning_rate": 3.0165054069436546e-06, + "loss": 0.1128, + "step": 3640 + }, + { + "epoch": 4.67, + "learning_rate": 2.9595902105862267e-06, + "loss": 0.1203, + "step": 3645 + }, + { + "epoch": 4.67, + "learning_rate": 2.9026750142287997e-06, + "loss": 0.1286, + "step": 3650 + }, + { + "epoch": 4.68, + "learning_rate": 2.8457598178713718e-06, + "loss": 0.12, + "step": 3655 + }, + { + "epoch": 4.69, + "learning_rate": 2.7888446215139443e-06, + "loss": 0.095, + "step": 3660 + }, + { + "epoch": 4.69, + "learning_rate": 2.7319294251565172e-06, + "loss": 0.1002, + "step": 3665 + }, + { + "epoch": 4.7, + "learning_rate": 2.6750142287990894e-06, + "loss": 0.1256, + "step": 3670 + }, + { + "epoch": 4.71, + "learning_rate": 2.6180990324416623e-06, + "loss": 0.1148, + "step": 3675 + }, + { + "epoch": 4.71, + "learning_rate": 2.5611838360842344e-06, + "loss": 0.1646, + "step": 3680 + }, + { + "epoch": 4.72, + "learning_rate": 2.5042686397268074e-06, + "loss": 0.1083, + "step": 3685 + }, + { + "epoch": 4.72, + "learning_rate": 2.44735344336938e-06, + "loss": 0.1251, + "step": 3690 + }, + { + "epoch": 4.73, + "learning_rate": 2.3904382470119524e-06, + "loss": 0.1242, + "step": 3695 + }, + { + "epoch": 4.74, + "learning_rate": 2.333523050654525e-06, + "loss": 0.1671, + "step": 3700 + }, + { + "epoch": 4.74, + "learning_rate": 2.2766078542970975e-06, + "loss": 0.1442, + "step": 3705 + }, + { + "epoch": 4.75, + "learning_rate": 2.21969265793967e-06, + "loss": 0.1694, + "step": 3710 + }, + { + "epoch": 4.76, + "learning_rate": 2.1627774615822426e-06, + "loss": 0.1632, + "step": 3715 + }, + { + "epoch": 4.76, + "learning_rate": 2.105862265224815e-06, + "loss": 0.1738, + "step": 3720 + }, + { + "epoch": 4.77, + "learning_rate": 2.0489470688673876e-06, + "loss": 0.1062, + "step": 3725 + }, + { + "epoch": 4.78, + "learning_rate": 1.99203187250996e-06, + "loss": 0.1395, + "step": 3730 + }, + { + "epoch": 4.78, + "learning_rate": 1.9351166761525327e-06, + "loss": 0.1321, + "step": 3735 + }, + { + "epoch": 4.79, + "learning_rate": 1.8782014797951054e-06, + "loss": 0.1251, + "step": 3740 + }, + { + "epoch": 4.79, + "learning_rate": 1.821286283437678e-06, + "loss": 0.1499, + "step": 3745 + }, + { + "epoch": 4.8, + "learning_rate": 1.7643710870802505e-06, + "loss": 0.1338, + "step": 3750 + }, + { + "epoch": 4.81, + "learning_rate": 1.707455890722823e-06, + "loss": 0.1873, + "step": 3755 + }, + { + "epoch": 4.81, + "learning_rate": 1.6505406943653958e-06, + "loss": 0.1352, + "step": 3760 + }, + { + "epoch": 4.82, + "learning_rate": 1.5936254980079683e-06, + "loss": 0.1673, + "step": 3765 + }, + { + "epoch": 4.83, + "learning_rate": 1.5367103016505408e-06, + "loss": 0.1705, + "step": 3770 + }, + { + "epoch": 4.83, + "learning_rate": 1.4797951052931133e-06, + "loss": 0.1145, + "step": 3775 + }, + { + "epoch": 4.84, + "learning_rate": 1.4228799089356859e-06, + "loss": 0.0999, + "step": 3780 + }, + { + "epoch": 4.85, + "learning_rate": 1.3659647125782586e-06, + "loss": 0.1012, + "step": 3785 + }, + { + "epoch": 4.85, + "learning_rate": 1.3090495162208312e-06, + "loss": 0.1677, + "step": 3790 + }, + { + "epoch": 4.86, + "learning_rate": 1.2521343198634037e-06, + "loss": 0.169, + "step": 3795 + }, + { + "epoch": 4.87, + "learning_rate": 1.1952191235059762e-06, + "loss": 0.1844, + "step": 3800 + }, + { + "epoch": 4.87, + "learning_rate": 1.1383039271485487e-06, + "loss": 0.1701, + "step": 3805 + }, + { + "epoch": 4.88, + "learning_rate": 1.0813887307911213e-06, + "loss": 0.1319, + "step": 3810 + }, + { + "epoch": 4.88, + "learning_rate": 1.0244735344336938e-06, + "loss": 0.1546, + "step": 3815 + }, + { + "epoch": 4.89, + "learning_rate": 9.675583380762663e-07, + "loss": 0.1579, + "step": 3820 + }, + { + "epoch": 4.9, + "learning_rate": 9.10643141718839e-07, + "loss": 0.1308, + "step": 3825 + }, + { + "epoch": 4.9, + "learning_rate": 8.537279453614115e-07, + "loss": 0.1453, + "step": 3830 + }, + { + "epoch": 4.91, + "learning_rate": 7.968127490039841e-07, + "loss": 0.169, + "step": 3835 + }, + { + "epoch": 4.92, + "learning_rate": 7.398975526465567e-07, + "loss": 0.1191, + "step": 3840 + }, + { + "epoch": 4.92, + "learning_rate": 6.829823562891293e-07, + "loss": 0.152, + "step": 3845 + }, + { + "epoch": 4.93, + "learning_rate": 6.260671599317018e-07, + "loss": 0.1275, + "step": 3850 + }, + { + "epoch": 4.94, + "learning_rate": 5.691519635742744e-07, + "loss": 0.1409, + "step": 3855 + }, + { + "epoch": 4.94, + "learning_rate": 5.122367672168469e-07, + "loss": 0.1332, + "step": 3860 + }, + { + "epoch": 4.95, + "learning_rate": 4.553215708594195e-07, + "loss": 0.163, + "step": 3865 + }, + { + "epoch": 4.95, + "learning_rate": 3.9840637450199207e-07, + "loss": 0.1564, + "step": 3870 + }, + { + "epoch": 4.96, + "learning_rate": 3.4149117814456466e-07, + "loss": 0.2202, + "step": 3875 + }, + { + "epoch": 4.97, + "learning_rate": 2.845759817871372e-07, + "loss": 0.1769, + "step": 3880 + }, + { + "epoch": 4.97, + "learning_rate": 2.2766078542970974e-07, + "loss": 0.1343, + "step": 3885 + }, + { + "epoch": 4.98, + "learning_rate": 1.7074558907228233e-07, + "loss": 0.1481, + "step": 3890 + }, + { + "epoch": 4.99, + "learning_rate": 1.1383039271485487e-07, + "loss": 0.1591, + "step": 3895 + }, + { + "epoch": 4.99, + "learning_rate": 5.6915196357427436e-08, + "loss": 0.1983, + "step": 3900 + }, + { + "epoch": 5.0, + "learning_rate": 0.0, + "loss": 0.121, + "step": 3905 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.8735, + "eval_loss": 0.42226287722587585, + "eval_runtime": 60.1747, + "eval_samples_per_second": 166.183, + "eval_steps_per_second": 10.386, + "step": 3905 + }, + { + "epoch": 5.0, + "step": 3905, + "total_flos": 6.230614598311477e+18, + "train_loss": 0.0, + "train_runtime": 0.1997, + "train_samples_per_second": 1252007.126, + "train_steps_per_second": 19556.351 + } + ], + "max_steps": 3905, + "num_train_epochs": 5, + "total_flos": 6.230614598311477e+18, + "trial_name": null, + "trial_params": null +}