{ "best_metric": 0.974025974025974, "best_model_checkpoint": "resnet-50-resnet50_fashion/checkpoint-2077", "epoch": 49.66996699669967, "global_step": 4300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 1.1627906976744187e-05, "loss": 0.6908, "step": 10 }, { "epoch": 0.23, "learning_rate": 2.3255813953488374e-05, "loss": 0.6904, "step": 20 }, { "epoch": 0.35, "learning_rate": 3.488372093023256e-05, "loss": 0.6864, "step": 30 }, { "epoch": 0.46, "learning_rate": 4.651162790697675e-05, "loss": 0.6821, "step": 40 }, { "epoch": 0.58, "learning_rate": 4.991778247592201e-05, "loss": 0.6771, "step": 50 }, { "epoch": 0.69, "learning_rate": 4.9800328870096314e-05, "loss": 0.6719, "step": 60 }, { "epoch": 0.81, "learning_rate": 4.968287526427062e-05, "loss": 0.6696, "step": 70 }, { "epoch": 0.92, "learning_rate": 4.956542165844491e-05, "loss": 0.6532, "step": 80 }, { "epoch": 0.99, "eval_accuracy": 0.634508348794063, "eval_loss": 0.6780841946601868, "eval_runtime": 8.8727, "eval_samples_per_second": 60.748, "eval_steps_per_second": 7.664, "step": 86 }, { "epoch": 1.04, "learning_rate": 4.944796805261922e-05, "loss": 0.6442, "step": 90 }, { "epoch": 1.16, "learning_rate": 4.933051444679352e-05, "loss": 0.6432, "step": 100 }, { "epoch": 1.27, "learning_rate": 4.9213060840967814e-05, "loss": 0.64, "step": 110 }, { "epoch": 1.39, "learning_rate": 4.9095607235142123e-05, "loss": 0.6133, "step": 120 }, { "epoch": 1.5, "learning_rate": 4.897815362931642e-05, "loss": 0.5955, "step": 130 }, { "epoch": 1.62, "learning_rate": 4.886070002349072e-05, "loss": 0.5998, "step": 140 }, { "epoch": 1.73, "learning_rate": 4.8743246417665025e-05, "loss": 0.5724, "step": 150 }, { "epoch": 1.85, "learning_rate": 4.862579281183933e-05, "loss": 0.573, "step": 160 }, { "epoch": 1.96, "learning_rate": 4.850833920601362e-05, "loss": 0.5407, "step": 170 }, { "epoch": 2.0, "eval_accuracy": 0.8589981447124304, "eval_loss": 0.5222358107566833, "eval_runtime": 8.0811, "eval_samples_per_second": 66.699, "eval_steps_per_second": 8.415, "step": 173 }, { "epoch": 2.08, "learning_rate": 4.839088560018793e-05, "loss": 0.5141, "step": 180 }, { "epoch": 2.19, "learning_rate": 4.827343199436223e-05, "loss": 0.5092, "step": 190 }, { "epoch": 2.31, "learning_rate": 4.815597838853653e-05, "loss": 0.5005, "step": 200 }, { "epoch": 2.43, "learning_rate": 4.8038524782710834e-05, "loss": 0.4718, "step": 210 }, { "epoch": 2.54, "learning_rate": 4.7921071176885137e-05, "loss": 0.4495, "step": 220 }, { "epoch": 2.66, "learning_rate": 4.780361757105943e-05, "loss": 0.4403, "step": 230 }, { "epoch": 2.77, "learning_rate": 4.7686163965233735e-05, "loss": 0.438, "step": 240 }, { "epoch": 2.89, "learning_rate": 4.756871035940804e-05, "loss": 0.4086, "step": 250 }, { "epoch": 2.99, "eval_accuracy": 0.8923933209647495, "eval_loss": 0.3594764173030853, "eval_runtime": 8.0206, "eval_samples_per_second": 67.202, "eval_steps_per_second": 8.478, "step": 259 }, { "epoch": 3.0, "learning_rate": 4.7451256753582334e-05, "loss": 0.4199, "step": 260 }, { "epoch": 3.12, "learning_rate": 4.7333803147756636e-05, "loss": 0.3936, "step": 270 }, { "epoch": 3.23, "learning_rate": 4.721634954193094e-05, "loss": 0.3951, "step": 280 }, { "epoch": 3.35, "learning_rate": 4.709889593610524e-05, "loss": 0.365, "step": 290 }, { "epoch": 3.47, "learning_rate": 4.698144233027954e-05, "loss": 0.4232, "step": 300 }, { "epoch": 3.58, "learning_rate": 4.686398872445385e-05, "loss": 0.3703, "step": 310 }, { "epoch": 3.7, "learning_rate": 4.674653511862814e-05, "loss": 0.3714, "step": 320 }, { "epoch": 3.81, "learning_rate": 4.6629081512802445e-05, "loss": 0.3859, "step": 330 }, { "epoch": 3.93, "learning_rate": 4.651162790697675e-05, "loss": 0.3449, "step": 340 }, { "epoch": 4.0, "eval_accuracy": 0.9183673469387755, "eval_loss": 0.2615828514099121, "eval_runtime": 7.9757, "eval_samples_per_second": 67.58, "eval_steps_per_second": 8.526, "step": 346 }, { "epoch": 4.04, "learning_rate": 4.639417430115105e-05, "loss": 0.356, "step": 350 }, { "epoch": 4.16, "learning_rate": 4.627672069532535e-05, "loss": 0.3595, "step": 360 }, { "epoch": 4.27, "learning_rate": 4.615926708949965e-05, "loss": 0.3519, "step": 370 }, { "epoch": 4.39, "learning_rate": 4.604181348367395e-05, "loss": 0.3428, "step": 380 }, { "epoch": 4.5, "learning_rate": 4.592435987784825e-05, "loss": 0.332, "step": 390 }, { "epoch": 4.62, "learning_rate": 4.580690627202255e-05, "loss": 0.3621, "step": 400 }, { "epoch": 4.74, "learning_rate": 4.568945266619685e-05, "loss": 0.3565, "step": 410 }, { "epoch": 4.85, "learning_rate": 4.5571999060371156e-05, "loss": 0.3075, "step": 420 }, { "epoch": 4.97, "learning_rate": 4.545454545454546e-05, "loss": 0.3518, "step": 430 }, { "epoch": 4.99, "eval_accuracy": 0.9443413729128015, "eval_loss": 0.22880208492279053, "eval_runtime": 7.8304, "eval_samples_per_second": 68.834, "eval_steps_per_second": 8.684, "step": 432 }, { "epoch": 5.08, "learning_rate": 4.533709184871976e-05, "loss": 0.3537, "step": 440 }, { "epoch": 5.2, "learning_rate": 4.521963824289406e-05, "loss": 0.2907, "step": 450 }, { "epoch": 5.31, "learning_rate": 4.510218463706836e-05, "loss": 0.3318, "step": 460 }, { "epoch": 5.43, "learning_rate": 4.498473103124266e-05, "loss": 0.3502, "step": 470 }, { "epoch": 5.54, "learning_rate": 4.4867277425416965e-05, "loss": 0.3319, "step": 480 }, { "epoch": 5.66, "learning_rate": 4.474982381959126e-05, "loss": 0.3241, "step": 490 }, { "epoch": 5.78, "learning_rate": 4.463237021376557e-05, "loss": 0.2762, "step": 500 }, { "epoch": 5.89, "learning_rate": 4.4514916607939866e-05, "loss": 0.308, "step": 510 }, { "epoch": 6.0, "eval_accuracy": 0.9424860853432282, "eval_loss": 0.2758006155490875, "eval_runtime": 7.9236, "eval_samples_per_second": 68.024, "eval_steps_per_second": 8.582, "step": 519 }, { "epoch": 6.01, "learning_rate": 4.439746300211416e-05, "loss": 0.3129, "step": 520 }, { "epoch": 6.12, "learning_rate": 4.428000939628847e-05, "loss": 0.2942, "step": 530 }, { "epoch": 6.24, "learning_rate": 4.416255579046277e-05, "loss": 0.3346, "step": 540 }, { "epoch": 6.35, "learning_rate": 4.404510218463707e-05, "loss": 0.3333, "step": 550 }, { "epoch": 6.47, "learning_rate": 4.392764857881137e-05, "loss": 0.2897, "step": 560 }, { "epoch": 6.58, "learning_rate": 4.3810194972985676e-05, "loss": 0.3428, "step": 570 }, { "epoch": 6.7, "learning_rate": 4.369274136715997e-05, "loss": 0.2877, "step": 580 }, { "epoch": 6.82, "learning_rate": 4.3575287761334274e-05, "loss": 0.2512, "step": 590 }, { "epoch": 6.93, "learning_rate": 4.345783415550858e-05, "loss": 0.3209, "step": 600 }, { "epoch": 7.0, "eval_accuracy": 0.9369202226345084, "eval_loss": 0.3777436316013336, "eval_runtime": 7.9087, "eval_samples_per_second": 68.153, "eval_steps_per_second": 8.598, "step": 606 }, { "epoch": 7.05, "learning_rate": 4.334038054968288e-05, "loss": 0.2756, "step": 610 }, { "epoch": 7.16, "learning_rate": 4.3222926943857175e-05, "loss": 0.2645, "step": 620 }, { "epoch": 7.28, "learning_rate": 4.3105473338031485e-05, "loss": 0.2753, "step": 630 }, { "epoch": 7.39, "learning_rate": 4.298801973220578e-05, "loss": 0.3309, "step": 640 }, { "epoch": 7.51, "learning_rate": 4.2870566126380077e-05, "loss": 0.2937, "step": 650 }, { "epoch": 7.62, "learning_rate": 4.2753112520554386e-05, "loss": 0.246, "step": 660 }, { "epoch": 7.74, "learning_rate": 4.263565891472868e-05, "loss": 0.3071, "step": 670 }, { "epoch": 7.85, "learning_rate": 4.2518205308902985e-05, "loss": 0.2831, "step": 680 }, { "epoch": 7.97, "learning_rate": 4.240075170307729e-05, "loss": 0.284, "step": 690 }, { "epoch": 7.99, "eval_accuracy": 0.9554730983302412, "eval_loss": 0.1704244613647461, "eval_runtime": 7.9276, "eval_samples_per_second": 67.99, "eval_steps_per_second": 8.578, "step": 692 }, { "epoch": 8.09, "learning_rate": 4.228329809725159e-05, "loss": 0.3047, "step": 700 }, { "epoch": 8.2, "learning_rate": 4.2165844491425886e-05, "loss": 0.2757, "step": 710 }, { "epoch": 8.32, "learning_rate": 4.2048390885600195e-05, "loss": 0.2666, "step": 720 }, { "epoch": 8.43, "learning_rate": 4.193093727977449e-05, "loss": 0.2711, "step": 730 }, { "epoch": 8.55, "learning_rate": 4.1813483673948794e-05, "loss": 0.2843, "step": 740 }, { "epoch": 8.66, "learning_rate": 4.1696030068123096e-05, "loss": 0.2664, "step": 750 }, { "epoch": 8.78, "learning_rate": 4.15785764622974e-05, "loss": 0.2963, "step": 760 }, { "epoch": 8.89, "learning_rate": 4.1461122856471695e-05, "loss": 0.2466, "step": 770 }, { "epoch": 9.0, "eval_accuracy": 0.9461966604823747, "eval_loss": 0.15713872015476227, "eval_runtime": 7.9343, "eval_samples_per_second": 67.933, "eval_steps_per_second": 8.57, "step": 779 }, { "epoch": 9.01, "learning_rate": 4.1343669250646e-05, "loss": 0.2721, "step": 780 }, { "epoch": 9.13, "learning_rate": 4.12262156448203e-05, "loss": 0.2391, "step": 790 }, { "epoch": 9.24, "learning_rate": 4.1108762038994596e-05, "loss": 0.2502, "step": 800 }, { "epoch": 9.36, "learning_rate": 4.09913084331689e-05, "loss": 0.2653, "step": 810 }, { "epoch": 9.47, "learning_rate": 4.08738548273432e-05, "loss": 0.2557, "step": 820 }, { "epoch": 9.59, "learning_rate": 4.0756401221517504e-05, "loss": 0.2148, "step": 830 }, { "epoch": 9.7, "learning_rate": 4.06389476156918e-05, "loss": 0.2495, "step": 840 }, { "epoch": 9.82, "learning_rate": 4.052149400986611e-05, "loss": 0.2678, "step": 850 }, { "epoch": 9.93, "learning_rate": 4.0404040404040405e-05, "loss": 0.3123, "step": 860 }, { "epoch": 9.99, "eval_accuracy": 0.9406307977736549, "eval_loss": 0.6491873860359192, "eval_runtime": 8.122, "eval_samples_per_second": 66.363, "eval_steps_per_second": 8.372, "step": 865 }, { "epoch": 10.05, "learning_rate": 4.028658679821471e-05, "loss": 0.256, "step": 870 }, { "epoch": 10.17, "learning_rate": 4.016913319238901e-05, "loss": 0.2439, "step": 880 }, { "epoch": 10.28, "learning_rate": 4.005167958656331e-05, "loss": 0.2592, "step": 890 }, { "epoch": 10.4, "learning_rate": 3.993422598073761e-05, "loss": 0.2539, "step": 900 }, { "epoch": 10.51, "learning_rate": 3.981677237491191e-05, "loss": 0.2771, "step": 910 }, { "epoch": 10.63, "learning_rate": 3.9699318769086215e-05, "loss": 0.3149, "step": 920 }, { "epoch": 10.74, "learning_rate": 3.958186516326051e-05, "loss": 0.2281, "step": 930 }, { "epoch": 10.86, "learning_rate": 3.946441155743481e-05, "loss": 0.2264, "step": 940 }, { "epoch": 10.97, "learning_rate": 3.9346957951609116e-05, "loss": 0.2827, "step": 950 }, { "epoch": 11.0, "eval_accuracy": 0.9406307977736549, "eval_loss": 0.49678388237953186, "eval_runtime": 8.0733, "eval_samples_per_second": 66.763, "eval_steps_per_second": 8.423, "step": 952 }, { "epoch": 11.09, "learning_rate": 3.922950434578342e-05, "loss": 0.2954, "step": 960 }, { "epoch": 11.2, "learning_rate": 3.9112050739957714e-05, "loss": 0.2484, "step": 970 }, { "epoch": 11.32, "learning_rate": 3.8994597134132024e-05, "loss": 0.2382, "step": 980 }, { "epoch": 11.44, "learning_rate": 3.887714352830632e-05, "loss": 0.2572, "step": 990 }, { "epoch": 11.55, "learning_rate": 3.875968992248062e-05, "loss": 0.243, "step": 1000 }, { "epoch": 11.67, "learning_rate": 3.8642236316654925e-05, "loss": 0.2569, "step": 1010 }, { "epoch": 11.78, "learning_rate": 3.852478271082923e-05, "loss": 0.2302, "step": 1020 }, { "epoch": 11.9, "learning_rate": 3.8407329105003524e-05, "loss": 0.2736, "step": 1030 }, { "epoch": 11.99, "eval_accuracy": 0.9591836734693877, "eval_loss": 0.13702794909477234, "eval_runtime": 8.0983, "eval_samples_per_second": 66.557, "eval_steps_per_second": 8.397, "step": 1038 }, { "epoch": 12.01, "learning_rate": 3.8289875499177826e-05, "loss": 0.2202, "step": 1040 }, { "epoch": 12.13, "learning_rate": 3.817242189335213e-05, "loss": 0.2247, "step": 1050 }, { "epoch": 12.24, "learning_rate": 3.8054968287526425e-05, "loss": 0.2449, "step": 1060 }, { "epoch": 12.36, "learning_rate": 3.7937514681700734e-05, "loss": 0.249, "step": 1070 }, { "epoch": 12.48, "learning_rate": 3.782006107587503e-05, "loss": 0.2809, "step": 1080 }, { "epoch": 12.59, "learning_rate": 3.770260747004933e-05, "loss": 0.2439, "step": 1090 }, { "epoch": 12.71, "learning_rate": 3.7585153864223635e-05, "loss": 0.2326, "step": 1100 }, { "epoch": 12.82, "learning_rate": 3.746770025839794e-05, "loss": 0.2725, "step": 1110 }, { "epoch": 12.94, "learning_rate": 3.7350246652572234e-05, "loss": 0.2476, "step": 1120 }, { "epoch": 13.0, "eval_accuracy": 0.9499072356215214, "eval_loss": 0.16156192123889923, "eval_runtime": 8.2723, "eval_samples_per_second": 65.157, "eval_steps_per_second": 8.22, "step": 1125 }, { "epoch": 13.05, "learning_rate": 3.723279304674654e-05, "loss": 0.2495, "step": 1130 }, { "epoch": 13.17, "learning_rate": 3.711533944092084e-05, "loss": 0.2815, "step": 1140 }, { "epoch": 13.28, "learning_rate": 3.699788583509514e-05, "loss": 0.27, "step": 1150 }, { "epoch": 13.4, "learning_rate": 3.688043222926944e-05, "loss": 0.2174, "step": 1160 }, { "epoch": 13.51, "learning_rate": 3.676297862344374e-05, "loss": 0.2503, "step": 1170 }, { "epoch": 13.63, "learning_rate": 3.664552501761804e-05, "loss": 0.2209, "step": 1180 }, { "epoch": 13.75, "learning_rate": 3.652807141179234e-05, "loss": 0.1989, "step": 1190 }, { "epoch": 13.86, "learning_rate": 3.641061780596665e-05, "loss": 0.2473, "step": 1200 }, { "epoch": 13.98, "learning_rate": 3.6293164200140944e-05, "loss": 0.195, "step": 1210 }, { "epoch": 14.0, "eval_accuracy": 0.961038961038961, "eval_loss": 0.1361682415008545, "eval_runtime": 8.4028, "eval_samples_per_second": 64.145, "eval_steps_per_second": 8.093, "step": 1212 }, { "epoch": 14.09, "learning_rate": 3.617571059431525e-05, "loss": 0.2768, "step": 1220 }, { "epoch": 14.21, "learning_rate": 3.605825698848955e-05, "loss": 0.2251, "step": 1230 }, { "epoch": 14.32, "learning_rate": 3.594080338266385e-05, "loss": 0.1665, "step": 1240 }, { "epoch": 14.44, "learning_rate": 3.582334977683815e-05, "loss": 0.2384, "step": 1250 }, { "epoch": 14.55, "learning_rate": 3.570589617101245e-05, "loss": 0.2133, "step": 1260 }, { "epoch": 14.67, "learning_rate": 3.5588442565186754e-05, "loss": 0.2234, "step": 1270 }, { "epoch": 14.79, "learning_rate": 3.5470988959361056e-05, "loss": 0.2373, "step": 1280 }, { "epoch": 14.9, "learning_rate": 3.535353535353535e-05, "loss": 0.2536, "step": 1290 }, { "epoch": 14.99, "eval_accuracy": 0.9536178107606679, "eval_loss": 0.12982788681983948, "eval_runtime": 8.2944, "eval_samples_per_second": 64.984, "eval_steps_per_second": 8.198, "step": 1298 }, { "epoch": 15.02, "learning_rate": 3.5236081747709655e-05, "loss": 0.2483, "step": 1300 }, { "epoch": 15.13, "learning_rate": 3.511862814188396e-05, "loss": 0.2263, "step": 1310 }, { "epoch": 15.25, "learning_rate": 3.500117453605825e-05, "loss": 0.2542, "step": 1320 }, { "epoch": 15.36, "learning_rate": 3.488372093023256e-05, "loss": 0.2009, "step": 1330 }, { "epoch": 15.48, "learning_rate": 3.476626732440686e-05, "loss": 0.2383, "step": 1340 }, { "epoch": 15.59, "learning_rate": 3.464881371858116e-05, "loss": 0.2116, "step": 1350 }, { "epoch": 15.71, "learning_rate": 3.4531360112755464e-05, "loss": 0.2653, "step": 1360 }, { "epoch": 15.83, "learning_rate": 3.441390650692977e-05, "loss": 0.2447, "step": 1370 }, { "epoch": 15.94, "learning_rate": 3.429645290110406e-05, "loss": 0.2022, "step": 1380 }, { "epoch": 16.0, "eval_accuracy": 0.9517625231910947, "eval_loss": 0.7470229268074036, "eval_runtime": 8.0594, "eval_samples_per_second": 66.878, "eval_steps_per_second": 8.437, "step": 1385 }, { "epoch": 16.06, "learning_rate": 3.417899929527837e-05, "loss": 0.2249, "step": 1390 }, { "epoch": 16.17, "learning_rate": 3.406154568945267e-05, "loss": 0.2498, "step": 1400 }, { "epoch": 16.29, "learning_rate": 3.394409208362697e-05, "loss": 0.215, "step": 1410 }, { "epoch": 16.4, "learning_rate": 3.382663847780127e-05, "loss": 0.2512, "step": 1420 }, { "epoch": 16.52, "learning_rate": 3.370918487197557e-05, "loss": 0.2096, "step": 1430 }, { "epoch": 16.63, "learning_rate": 3.359173126614987e-05, "loss": 0.2097, "step": 1440 }, { "epoch": 16.75, "learning_rate": 3.3474277660324174e-05, "loss": 0.2158, "step": 1450 }, { "epoch": 16.86, "learning_rate": 3.335682405449848e-05, "loss": 0.2303, "step": 1460 }, { "epoch": 16.98, "learning_rate": 3.323937044867277e-05, "loss": 0.2406, "step": 1470 }, { "epoch": 16.99, "eval_accuracy": 0.9647495361781077, "eval_loss": 0.12411854416131973, "eval_runtime": 7.9838, "eval_samples_per_second": 67.512, "eval_steps_per_second": 8.517, "step": 1471 }, { "epoch": 17.1, "learning_rate": 3.3121916842847076e-05, "loss": 0.2697, "step": 1480 }, { "epoch": 17.21, "learning_rate": 3.300446323702138e-05, "loss": 0.2567, "step": 1490 }, { "epoch": 17.33, "learning_rate": 3.288700963119568e-05, "loss": 0.2745, "step": 1500 }, { "epoch": 17.44, "learning_rate": 3.276955602536998e-05, "loss": 0.2269, "step": 1510 }, { "epoch": 17.56, "learning_rate": 3.2652102419544286e-05, "loss": 0.1893, "step": 1520 }, { "epoch": 17.67, "learning_rate": 3.253464881371858e-05, "loss": 0.1827, "step": 1530 }, { "epoch": 17.79, "learning_rate": 3.2417195207892885e-05, "loss": 0.2579, "step": 1540 }, { "epoch": 17.9, "learning_rate": 3.229974160206719e-05, "loss": 0.2019, "step": 1550 }, { "epoch": 18.0, "eval_accuracy": 0.9536178107606679, "eval_loss": 0.12778125703334808, "eval_runtime": 8.0655, "eval_samples_per_second": 66.828, "eval_steps_per_second": 8.431, "step": 1558 }, { "epoch": 18.02, "learning_rate": 3.2182287996241483e-05, "loss": 0.184, "step": 1560 }, { "epoch": 18.14, "learning_rate": 3.2064834390415786e-05, "loss": 0.2261, "step": 1570 }, { "epoch": 18.25, "learning_rate": 3.194738078459009e-05, "loss": 0.2155, "step": 1580 }, { "epoch": 18.37, "learning_rate": 3.182992717876439e-05, "loss": 0.1857, "step": 1590 }, { "epoch": 18.48, "learning_rate": 3.171247357293869e-05, "loss": 0.2009, "step": 1600 }, { "epoch": 18.6, "learning_rate": 3.159501996711299e-05, "loss": 0.213, "step": 1610 }, { "epoch": 18.71, "learning_rate": 3.147756636128729e-05, "loss": 0.2299, "step": 1620 }, { "epoch": 18.83, "learning_rate": 3.1360112755461595e-05, "loss": 0.1956, "step": 1630 }, { "epoch": 18.94, "learning_rate": 3.124265914963589e-05, "loss": 0.2073, "step": 1640 }, { "epoch": 18.99, "eval_accuracy": 0.9684601113172542, "eval_loss": 0.11341895163059235, "eval_runtime": 8.1079, "eval_samples_per_second": 66.478, "eval_steps_per_second": 8.387, "step": 1644 }, { "epoch": 19.06, "learning_rate": 3.11252055438102e-05, "loss": 0.1869, "step": 1650 }, { "epoch": 19.17, "learning_rate": 3.1007751937984497e-05, "loss": 0.1941, "step": 1660 }, { "epoch": 19.29, "learning_rate": 3.08902983321588e-05, "loss": 0.2218, "step": 1670 }, { "epoch": 19.41, "learning_rate": 3.07728447263331e-05, "loss": 0.196, "step": 1680 }, { "epoch": 19.52, "learning_rate": 3.06553911205074e-05, "loss": 0.2339, "step": 1690 }, { "epoch": 19.64, "learning_rate": 3.05379375146817e-05, "loss": 0.2045, "step": 1700 }, { "epoch": 19.75, "learning_rate": 3.0420483908856e-05, "loss": 0.2057, "step": 1710 }, { "epoch": 19.87, "learning_rate": 3.0303030303030306e-05, "loss": 0.2136, "step": 1720 }, { "epoch": 19.98, "learning_rate": 3.0185576697204605e-05, "loss": 0.1873, "step": 1730 }, { "epoch": 20.0, "eval_accuracy": 0.9628942486085343, "eval_loss": 0.6738272309303284, "eval_runtime": 8.4568, "eval_samples_per_second": 63.736, "eval_steps_per_second": 8.041, "step": 1731 }, { "epoch": 20.1, "learning_rate": 3.0068123091378908e-05, "loss": 0.2643, "step": 1740 }, { "epoch": 20.21, "learning_rate": 2.9950669485553207e-05, "loss": 0.1854, "step": 1750 }, { "epoch": 20.33, "learning_rate": 2.983321587972751e-05, "loss": 0.2382, "step": 1760 }, { "epoch": 20.45, "learning_rate": 2.971576227390181e-05, "loss": 0.18, "step": 1770 }, { "epoch": 20.56, "learning_rate": 2.9598308668076115e-05, "loss": 0.1763, "step": 1780 }, { "epoch": 20.68, "learning_rate": 2.948085506225041e-05, "loss": 0.2399, "step": 1790 }, { "epoch": 20.79, "learning_rate": 2.9363401456424717e-05, "loss": 0.2275, "step": 1800 }, { "epoch": 20.91, "learning_rate": 2.9245947850599016e-05, "loss": 0.2446, "step": 1810 }, { "epoch": 21.0, "eval_accuracy": 0.9684601113172542, "eval_loss": 0.1033068299293518, "eval_runtime": 8.3041, "eval_samples_per_second": 64.908, "eval_steps_per_second": 8.189, "step": 1818 }, { "epoch": 21.02, "learning_rate": 2.9128494244773312e-05, "loss": 0.2423, "step": 1820 }, { "epoch": 21.14, "learning_rate": 2.9011040638947618e-05, "loss": 0.2239, "step": 1830 }, { "epoch": 21.25, "learning_rate": 2.8893587033121917e-05, "loss": 0.2212, "step": 1840 }, { "epoch": 21.37, "learning_rate": 2.877613342729622e-05, "loss": 0.1594, "step": 1850 }, { "epoch": 21.49, "learning_rate": 2.865867982147052e-05, "loss": 0.1878, "step": 1860 }, { "epoch": 21.6, "learning_rate": 2.8541226215644822e-05, "loss": 0.1965, "step": 1870 }, { "epoch": 21.72, "learning_rate": 2.842377260981912e-05, "loss": 0.2186, "step": 1880 }, { "epoch": 21.83, "learning_rate": 2.8306319003993427e-05, "loss": 0.1904, "step": 1890 }, { "epoch": 21.95, "learning_rate": 2.8188865398167723e-05, "loss": 0.1999, "step": 1900 }, { "epoch": 21.99, "eval_accuracy": 0.9647495361781077, "eval_loss": 0.11812406778335571, "eval_runtime": 8.2822, "eval_samples_per_second": 65.079, "eval_steps_per_second": 8.21, "step": 1904 }, { "epoch": 22.06, "learning_rate": 2.807141179234203e-05, "loss": 0.2147, "step": 1910 }, { "epoch": 22.18, "learning_rate": 2.795395818651633e-05, "loss": 0.2158, "step": 1920 }, { "epoch": 22.29, "learning_rate": 2.783650458069063e-05, "loss": 0.1937, "step": 1930 }, { "epoch": 22.41, "learning_rate": 2.771905097486493e-05, "loss": 0.2208, "step": 1940 }, { "epoch": 22.52, "learning_rate": 2.760159736903923e-05, "loss": 0.155, "step": 1950 }, { "epoch": 22.64, "learning_rate": 2.7484143763213532e-05, "loss": 0.1793, "step": 1960 }, { "epoch": 22.76, "learning_rate": 2.736669015738783e-05, "loss": 0.1794, "step": 1970 }, { "epoch": 22.87, "learning_rate": 2.7249236551562134e-05, "loss": 0.22, "step": 1980 }, { "epoch": 22.99, "learning_rate": 2.7131782945736434e-05, "loss": 0.1716, "step": 1990 }, { "epoch": 23.0, "eval_accuracy": 0.961038961038961, "eval_loss": 0.10991629213094711, "eval_runtime": 8.221, "eval_samples_per_second": 65.564, "eval_steps_per_second": 8.272, "step": 1991 }, { "epoch": 23.1, "learning_rate": 2.7014329339910736e-05, "loss": 0.1692, "step": 2000 }, { "epoch": 23.22, "learning_rate": 2.6896875734085036e-05, "loss": 0.1931, "step": 2010 }, { "epoch": 23.33, "learning_rate": 2.677942212825934e-05, "loss": 0.1663, "step": 2020 }, { "epoch": 23.45, "learning_rate": 2.6661968522433637e-05, "loss": 0.2102, "step": 2030 }, { "epoch": 23.56, "learning_rate": 2.6544514916607944e-05, "loss": 0.1822, "step": 2040 }, { "epoch": 23.68, "learning_rate": 2.6427061310782243e-05, "loss": 0.2216, "step": 2050 }, { "epoch": 23.8, "learning_rate": 2.6309607704956545e-05, "loss": 0.1731, "step": 2060 }, { "epoch": 23.91, "learning_rate": 2.6192154099130845e-05, "loss": 0.175, "step": 2070 }, { "epoch": 23.99, "eval_accuracy": 0.974025974025974, "eval_loss": 0.10644800215959549, "eval_runtime": 8.1046, "eval_samples_per_second": 66.505, "eval_steps_per_second": 8.39, "step": 2077 }, { "epoch": 24.03, "learning_rate": 2.6074700493305144e-05, "loss": 0.1845, "step": 2080 }, { "epoch": 24.14, "learning_rate": 2.5957246887479447e-05, "loss": 0.2091, "step": 2090 }, { "epoch": 24.26, "learning_rate": 2.5839793281653746e-05, "loss": 0.1783, "step": 2100 }, { "epoch": 24.37, "learning_rate": 2.572233967582805e-05, "loss": 0.1822, "step": 2110 }, { "epoch": 24.49, "learning_rate": 2.5604886070002348e-05, "loss": 0.2239, "step": 2120 }, { "epoch": 24.6, "learning_rate": 2.5487432464176654e-05, "loss": 0.1639, "step": 2130 }, { "epoch": 24.72, "learning_rate": 2.536997885835095e-05, "loss": 0.1839, "step": 2140 }, { "epoch": 24.83, "learning_rate": 2.5252525252525256e-05, "loss": 0.2114, "step": 2150 }, { "epoch": 24.95, "learning_rate": 2.5135071646699555e-05, "loss": 0.1962, "step": 2160 }, { "epoch": 25.0, "eval_accuracy": 0.9721706864564007, "eval_loss": 0.11735469102859497, "eval_runtime": 8.2081, "eval_samples_per_second": 65.667, "eval_steps_per_second": 8.284, "step": 2164 }, { "epoch": 25.07, "learning_rate": 2.5017618040873858e-05, "loss": 0.1658, "step": 2170 }, { "epoch": 25.18, "learning_rate": 2.4900164435048157e-05, "loss": 0.1979, "step": 2180 }, { "epoch": 25.3, "learning_rate": 2.4782710829222456e-05, "loss": 0.1707, "step": 2190 }, { "epoch": 25.41, "learning_rate": 2.466525722339676e-05, "loss": 0.1932, "step": 2200 }, { "epoch": 25.53, "learning_rate": 2.4547803617571062e-05, "loss": 0.2355, "step": 2210 }, { "epoch": 25.64, "learning_rate": 2.443035001174536e-05, "loss": 0.2246, "step": 2220 }, { "epoch": 25.76, "learning_rate": 2.4312896405919664e-05, "loss": 0.2031, "step": 2230 }, { "epoch": 25.87, "learning_rate": 2.4195442800093966e-05, "loss": 0.2062, "step": 2240 }, { "epoch": 25.99, "learning_rate": 2.4077989194268266e-05, "loss": 0.1943, "step": 2250 }, { "epoch": 25.99, "eval_accuracy": 0.9517625231910947, "eval_loss": 1.0624566078186035, "eval_runtime": 8.1518, "eval_samples_per_second": 66.12, "eval_steps_per_second": 8.342, "step": 2250 }, { "epoch": 26.11, "learning_rate": 2.3960535588442568e-05, "loss": 0.2071, "step": 2260 }, { "epoch": 26.22, "learning_rate": 2.3843081982616868e-05, "loss": 0.1872, "step": 2270 }, { "epoch": 26.34, "learning_rate": 2.3725628376791167e-05, "loss": 0.1835, "step": 2280 }, { "epoch": 26.45, "learning_rate": 2.360817477096547e-05, "loss": 0.171, "step": 2290 }, { "epoch": 26.57, "learning_rate": 2.349072116513977e-05, "loss": 0.2028, "step": 2300 }, { "epoch": 26.68, "learning_rate": 2.337326755931407e-05, "loss": 0.2108, "step": 2310 }, { "epoch": 26.8, "learning_rate": 2.3255813953488374e-05, "loss": 0.2046, "step": 2320 }, { "epoch": 26.91, "learning_rate": 2.3138360347662673e-05, "loss": 0.2044, "step": 2330 }, { "epoch": 27.0, "eval_accuracy": 0.9573283858998145, "eval_loss": 0.8419390916824341, "eval_runtime": 7.9375, "eval_samples_per_second": 67.906, "eval_steps_per_second": 8.567, "step": 2337 }, { "epoch": 27.03, "learning_rate": 2.3020906741836976e-05, "loss": 0.2017, "step": 2340 }, { "epoch": 27.15, "learning_rate": 2.2903453136011275e-05, "loss": 0.1923, "step": 2350 }, { "epoch": 27.26, "learning_rate": 2.2785999530185578e-05, "loss": 0.171, "step": 2360 }, { "epoch": 27.38, "learning_rate": 2.266854592435988e-05, "loss": 0.1912, "step": 2370 }, { "epoch": 27.49, "learning_rate": 2.255109231853418e-05, "loss": 0.2001, "step": 2380 }, { "epoch": 27.61, "learning_rate": 2.2433638712708483e-05, "loss": 0.1668, "step": 2390 }, { "epoch": 27.72, "learning_rate": 2.2316185106882785e-05, "loss": 0.1699, "step": 2400 }, { "epoch": 27.84, "learning_rate": 2.219873150105708e-05, "loss": 0.1944, "step": 2410 }, { "epoch": 27.95, "learning_rate": 2.2081277895231384e-05, "loss": 0.1835, "step": 2420 }, { "epoch": 28.0, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.11119159311056137, "eval_runtime": 7.8965, "eval_samples_per_second": 68.258, "eval_steps_per_second": 8.611, "step": 2424 }, { "epoch": 28.07, "learning_rate": 2.1963824289405686e-05, "loss": 0.1724, "step": 2430 }, { "epoch": 28.18, "learning_rate": 2.1846370683579986e-05, "loss": 0.2228, "step": 2440 }, { "epoch": 28.3, "learning_rate": 2.172891707775429e-05, "loss": 0.1672, "step": 2450 }, { "epoch": 28.42, "learning_rate": 2.1611463471928588e-05, "loss": 0.2021, "step": 2460 }, { "epoch": 28.53, "learning_rate": 2.149400986610289e-05, "loss": 0.1893, "step": 2470 }, { "epoch": 28.65, "learning_rate": 2.1376556260277193e-05, "loss": 0.2024, "step": 2480 }, { "epoch": 28.76, "learning_rate": 2.1259102654451492e-05, "loss": 0.1508, "step": 2490 }, { "epoch": 28.88, "learning_rate": 2.1141649048625795e-05, "loss": 0.1562, "step": 2500 }, { "epoch": 28.99, "learning_rate": 2.1024195442800098e-05, "loss": 0.191, "step": 2510 }, { "epoch": 28.99, "eval_accuracy": 0.9684601113172542, "eval_loss": 0.11420014500617981, "eval_runtime": 7.9348, "eval_samples_per_second": 67.929, "eval_steps_per_second": 8.57, "step": 2510 }, { "epoch": 29.11, "learning_rate": 2.0906741836974397e-05, "loss": 0.1925, "step": 2520 }, { "epoch": 29.22, "learning_rate": 2.07892882311487e-05, "loss": 0.1513, "step": 2530 }, { "epoch": 29.34, "learning_rate": 2.0671834625323e-05, "loss": 0.2406, "step": 2540 }, { "epoch": 29.46, "learning_rate": 2.0554381019497298e-05, "loss": 0.1809, "step": 2550 }, { "epoch": 29.57, "learning_rate": 2.04369274136716e-05, "loss": 0.1641, "step": 2560 }, { "epoch": 29.69, "learning_rate": 2.03194738078459e-05, "loss": 0.1805, "step": 2570 }, { "epoch": 29.8, "learning_rate": 2.0202020202020203e-05, "loss": 0.1702, "step": 2580 }, { "epoch": 29.92, "learning_rate": 2.0084566596194505e-05, "loss": 0.1676, "step": 2590 }, { "epoch": 30.0, "eval_accuracy": 0.9647495361781077, "eval_loss": 0.10803297162055969, "eval_runtime": 7.8199, "eval_samples_per_second": 68.927, "eval_steps_per_second": 8.696, "step": 2597 }, { "epoch": 30.03, "learning_rate": 1.9967112990368805e-05, "loss": 0.1554, "step": 2600 }, { "epoch": 30.15, "learning_rate": 1.9849659384543107e-05, "loss": 0.2092, "step": 2610 }, { "epoch": 30.26, "learning_rate": 1.9732205778717407e-05, "loss": 0.16, "step": 2620 }, { "epoch": 30.38, "learning_rate": 1.961475217289171e-05, "loss": 0.1932, "step": 2630 }, { "epoch": 30.5, "learning_rate": 1.9497298567066012e-05, "loss": 0.1742, "step": 2640 }, { "epoch": 30.61, "learning_rate": 1.937984496124031e-05, "loss": 0.1714, "step": 2650 }, { "epoch": 30.73, "learning_rate": 1.9262391355414614e-05, "loss": 0.1668, "step": 2660 }, { "epoch": 30.84, "learning_rate": 1.9144937749588913e-05, "loss": 0.2284, "step": 2670 }, { "epoch": 30.96, "learning_rate": 1.9027484143763212e-05, "loss": 0.1533, "step": 2680 }, { "epoch": 30.99, "eval_accuracy": 0.9647495361781077, "eval_loss": 0.14941494166851044, "eval_runtime": 7.9882, "eval_samples_per_second": 67.474, "eval_steps_per_second": 8.513, "step": 2683 }, { "epoch": 31.07, "learning_rate": 1.8910030537937515e-05, "loss": 0.179, "step": 2690 }, { "epoch": 31.19, "learning_rate": 1.8792576932111818e-05, "loss": 0.1589, "step": 2700 }, { "epoch": 31.3, "learning_rate": 1.8675123326286117e-05, "loss": 0.2133, "step": 2710 }, { "epoch": 31.42, "learning_rate": 1.855766972046042e-05, "loss": 0.173, "step": 2720 }, { "epoch": 31.53, "learning_rate": 1.844021611463472e-05, "loss": 0.1998, "step": 2730 }, { "epoch": 31.65, "learning_rate": 1.832276250880902e-05, "loss": 0.2054, "step": 2740 }, { "epoch": 31.77, "learning_rate": 1.8205308902983324e-05, "loss": 0.1739, "step": 2750 }, { "epoch": 31.88, "learning_rate": 1.8087855297157624e-05, "loss": 0.1581, "step": 2760 }, { "epoch": 32.0, "learning_rate": 1.7970401691331926e-05, "loss": 0.1991, "step": 2770 }, { "epoch": 32.0, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.10002347081899643, "eval_runtime": 8.066, "eval_samples_per_second": 66.823, "eval_steps_per_second": 8.43, "step": 2770 }, { "epoch": 32.11, "learning_rate": 1.7852948085506225e-05, "loss": 0.1573, "step": 2780 }, { "epoch": 32.23, "learning_rate": 1.7735494479680528e-05, "loss": 0.1641, "step": 2790 }, { "epoch": 32.34, "learning_rate": 1.7618040873854827e-05, "loss": 0.1656, "step": 2800 }, { "epoch": 32.46, "learning_rate": 1.7500587268029127e-05, "loss": 0.2127, "step": 2810 }, { "epoch": 32.57, "learning_rate": 1.738313366220343e-05, "loss": 0.1756, "step": 2820 }, { "epoch": 32.69, "learning_rate": 1.7265680056377732e-05, "loss": 0.1754, "step": 2830 }, { "epoch": 32.81, "learning_rate": 1.714822645055203e-05, "loss": 0.1605, "step": 2840 }, { "epoch": 32.92, "learning_rate": 1.7030772844726334e-05, "loss": 0.1845, "step": 2850 }, { "epoch": 32.99, "eval_accuracy": 0.974025974025974, "eval_loss": 0.09888846427202225, "eval_runtime": 7.9989, "eval_samples_per_second": 67.385, "eval_steps_per_second": 8.501, "step": 2856 }, { "epoch": 33.04, "learning_rate": 1.6913319238900637e-05, "loss": 0.1855, "step": 2860 }, { "epoch": 33.15, "learning_rate": 1.6795865633074936e-05, "loss": 0.211, "step": 2870 }, { "epoch": 33.27, "learning_rate": 1.667841202724924e-05, "loss": 0.2067, "step": 2880 }, { "epoch": 33.38, "learning_rate": 1.6560958421423538e-05, "loss": 0.1738, "step": 2890 }, { "epoch": 33.5, "learning_rate": 1.644350481559784e-05, "loss": 0.1725, "step": 2900 }, { "epoch": 33.61, "learning_rate": 1.6326051209772143e-05, "loss": 0.1686, "step": 2910 }, { "epoch": 33.73, "learning_rate": 1.6208597603946442e-05, "loss": 0.1642, "step": 2920 }, { "epoch": 33.84, "learning_rate": 1.6091143998120742e-05, "loss": 0.1527, "step": 2930 }, { "epoch": 33.96, "learning_rate": 1.5973690392295044e-05, "loss": 0.1605, "step": 2940 }, { "epoch": 34.0, "eval_accuracy": 0.9684601113172542, "eval_loss": 0.09749138355255127, "eval_runtime": 8.1236, "eval_samples_per_second": 66.35, "eval_steps_per_second": 8.371, "step": 2943 }, { "epoch": 34.08, "learning_rate": 1.5856236786469344e-05, "loss": 0.1699, "step": 2950 }, { "epoch": 34.19, "learning_rate": 1.5738783180643646e-05, "loss": 0.1646, "step": 2960 }, { "epoch": 34.31, "learning_rate": 1.5621329574817946e-05, "loss": 0.1779, "step": 2970 }, { "epoch": 34.42, "learning_rate": 1.5503875968992248e-05, "loss": 0.1908, "step": 2980 }, { "epoch": 34.54, "learning_rate": 1.538642236316655e-05, "loss": 0.182, "step": 2990 }, { "epoch": 34.65, "learning_rate": 1.526896875734085e-05, "loss": 0.2004, "step": 3000 }, { "epoch": 34.77, "learning_rate": 1.5151515151515153e-05, "loss": 0.1426, "step": 3010 }, { "epoch": 34.88, "learning_rate": 1.5034061545689454e-05, "loss": 0.1614, "step": 3020 }, { "epoch": 35.0, "learning_rate": 1.4916607939863755e-05, "loss": 0.1928, "step": 3030 }, { "epoch": 35.0, "eval_accuracy": 0.9628942486085343, "eval_loss": 0.4555383026599884, "eval_runtime": 7.8835, "eval_samples_per_second": 68.371, "eval_steps_per_second": 8.626, "step": 3030 }, { "epoch": 35.12, "learning_rate": 1.4799154334038057e-05, "loss": 0.1884, "step": 3040 }, { "epoch": 35.23, "learning_rate": 1.4681700728212358e-05, "loss": 0.1651, "step": 3050 }, { "epoch": 35.35, "learning_rate": 1.4564247122386656e-05, "loss": 0.1602, "step": 3060 }, { "epoch": 35.46, "learning_rate": 1.4446793516560959e-05, "loss": 0.1688, "step": 3070 }, { "epoch": 35.58, "learning_rate": 1.432933991073526e-05, "loss": 0.1719, "step": 3080 }, { "epoch": 35.69, "learning_rate": 1.421188630490956e-05, "loss": 0.1608, "step": 3090 }, { "epoch": 35.81, "learning_rate": 1.4094432699083862e-05, "loss": 0.163, "step": 3100 }, { "epoch": 35.92, "learning_rate": 1.3976979093258164e-05, "loss": 0.1506, "step": 3110 }, { "epoch": 35.99, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.1059107631444931, "eval_runtime": 8.0108, "eval_samples_per_second": 67.284, "eval_steps_per_second": 8.489, "step": 3116 }, { "epoch": 36.04, "learning_rate": 1.3859525487432465e-05, "loss": 0.1802, "step": 3120 }, { "epoch": 36.16, "learning_rate": 1.3742071881606766e-05, "loss": 0.1332, "step": 3130 }, { "epoch": 36.27, "learning_rate": 1.3624618275781067e-05, "loss": 0.1298, "step": 3140 }, { "epoch": 36.39, "learning_rate": 1.3507164669955368e-05, "loss": 0.1705, "step": 3150 }, { "epoch": 36.5, "learning_rate": 1.338971106412967e-05, "loss": 0.1431, "step": 3160 }, { "epoch": 36.62, "learning_rate": 1.3272257458303972e-05, "loss": 0.1582, "step": 3170 }, { "epoch": 36.73, "learning_rate": 1.3154803852478273e-05, "loss": 0.1544, "step": 3180 }, { "epoch": 36.85, "learning_rate": 1.3037350246652572e-05, "loss": 0.1966, "step": 3190 }, { "epoch": 36.96, "learning_rate": 1.2919896640826873e-05, "loss": 0.1912, "step": 3200 }, { "epoch": 37.0, "eval_accuracy": 0.9647495361781077, "eval_loss": 0.10163893550634384, "eval_runtime": 8.0326, "eval_samples_per_second": 67.102, "eval_steps_per_second": 8.466, "step": 3203 }, { "epoch": 37.08, "learning_rate": 1.2802443035001174e-05, "loss": 0.1956, "step": 3210 }, { "epoch": 37.19, "learning_rate": 1.2684989429175475e-05, "loss": 0.1705, "step": 3220 }, { "epoch": 37.31, "learning_rate": 1.2567535823349778e-05, "loss": 0.1559, "step": 3230 }, { "epoch": 37.43, "learning_rate": 1.2450082217524079e-05, "loss": 0.1684, "step": 3240 }, { "epoch": 37.54, "learning_rate": 1.233262861169838e-05, "loss": 0.1962, "step": 3250 }, { "epoch": 37.66, "learning_rate": 1.221517500587268e-05, "loss": 0.1528, "step": 3260 }, { "epoch": 37.77, "learning_rate": 1.2097721400046983e-05, "loss": 0.1788, "step": 3270 }, { "epoch": 37.89, "learning_rate": 1.1980267794221284e-05, "loss": 0.1689, "step": 3280 }, { "epoch": 37.99, "eval_accuracy": 0.9666048237476809, "eval_loss": 0.5420700907707214, "eval_runtime": 8.2102, "eval_samples_per_second": 65.65, "eval_steps_per_second": 8.282, "step": 3289 }, { "epoch": 38.0, "learning_rate": 1.1862814188395583e-05, "loss": 0.1739, "step": 3290 }, { "epoch": 38.12, "learning_rate": 1.1745360582569884e-05, "loss": 0.1396, "step": 3300 }, { "epoch": 38.23, "learning_rate": 1.1627906976744187e-05, "loss": 0.1871, "step": 3310 }, { "epoch": 38.35, "learning_rate": 1.1510453370918488e-05, "loss": 0.1947, "step": 3320 }, { "epoch": 38.47, "learning_rate": 1.1392999765092789e-05, "loss": 0.1823, "step": 3330 }, { "epoch": 38.58, "learning_rate": 1.127554615926709e-05, "loss": 0.1816, "step": 3340 }, { "epoch": 38.7, "learning_rate": 1.1158092553441393e-05, "loss": 0.2031, "step": 3350 }, { "epoch": 38.81, "learning_rate": 1.1040638947615692e-05, "loss": 0.1764, "step": 3360 }, { "epoch": 38.93, "learning_rate": 1.0923185341789993e-05, "loss": 0.1467, "step": 3370 }, { "epoch": 39.0, "eval_accuracy": 0.9647495361781077, "eval_loss": 0.10951773822307587, "eval_runtime": 8.0526, "eval_samples_per_second": 66.935, "eval_steps_per_second": 8.444, "step": 3376 }, { "epoch": 39.04, "learning_rate": 1.0805731735964294e-05, "loss": 0.1615, "step": 3380 }, { "epoch": 39.16, "learning_rate": 1.0688278130138596e-05, "loss": 0.1797, "step": 3390 }, { "epoch": 39.27, "learning_rate": 1.0570824524312897e-05, "loss": 0.1314, "step": 3400 }, { "epoch": 39.39, "learning_rate": 1.0453370918487198e-05, "loss": 0.19, "step": 3410 }, { "epoch": 39.5, "learning_rate": 1.03359173126615e-05, "loss": 0.1955, "step": 3420 }, { "epoch": 39.62, "learning_rate": 1.02184637068358e-05, "loss": 0.1635, "step": 3430 }, { "epoch": 39.74, "learning_rate": 1.0101010101010101e-05, "loss": 0.1544, "step": 3440 }, { "epoch": 39.85, "learning_rate": 9.983556495184402e-06, "loss": 0.1604, "step": 3450 }, { "epoch": 39.97, "learning_rate": 9.866102889358703e-06, "loss": 0.1513, "step": 3460 }, { "epoch": 39.99, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.3827688992023468, "eval_runtime": 7.9622, "eval_samples_per_second": 67.695, "eval_steps_per_second": 8.54, "step": 3462 }, { "epoch": 40.08, "learning_rate": 9.748649283533006e-06, "loss": 0.1578, "step": 3470 }, { "epoch": 40.2, "learning_rate": 9.631195677707307e-06, "loss": 0.1633, "step": 3480 }, { "epoch": 40.31, "learning_rate": 9.513742071881606e-06, "loss": 0.1273, "step": 3490 }, { "epoch": 40.43, "learning_rate": 9.396288466055909e-06, "loss": 0.1535, "step": 3500 }, { "epoch": 40.54, "learning_rate": 9.27883486023021e-06, "loss": 0.2011, "step": 3510 }, { "epoch": 40.66, "learning_rate": 9.16138125440451e-06, "loss": 0.1801, "step": 3520 }, { "epoch": 40.78, "learning_rate": 9.043927648578812e-06, "loss": 0.1341, "step": 3530 }, { "epoch": 40.89, "learning_rate": 8.926474042753113e-06, "loss": 0.1768, "step": 3540 }, { "epoch": 41.0, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.09445924311876297, "eval_runtime": 8.0713, "eval_samples_per_second": 66.78, "eval_steps_per_second": 8.425, "step": 3549 }, { "epoch": 41.01, "learning_rate": 8.809020436927414e-06, "loss": 0.1797, "step": 3550 }, { "epoch": 41.12, "learning_rate": 8.691566831101715e-06, "loss": 0.1782, "step": 3560 }, { "epoch": 41.24, "learning_rate": 8.574113225276016e-06, "loss": 0.164, "step": 3570 }, { "epoch": 41.35, "learning_rate": 8.456659619450318e-06, "loss": 0.156, "step": 3580 }, { "epoch": 41.47, "learning_rate": 8.33920601362462e-06, "loss": 0.1603, "step": 3590 }, { "epoch": 41.58, "learning_rate": 8.22175240779892e-06, "loss": 0.1663, "step": 3600 }, { "epoch": 41.7, "learning_rate": 8.104298801973221e-06, "loss": 0.1433, "step": 3610 }, { "epoch": 41.82, "learning_rate": 7.986845196147522e-06, "loss": 0.1769, "step": 3620 }, { "epoch": 41.93, "learning_rate": 7.869391590321823e-06, "loss": 0.1633, "step": 3630 }, { "epoch": 42.0, "eval_accuracy": 0.9591836734693877, "eval_loss": 0.22497127950191498, "eval_runtime": 8.2319, "eval_samples_per_second": 65.477, "eval_steps_per_second": 8.261, "step": 3636 }, { "epoch": 42.05, "learning_rate": 7.751937984496124e-06, "loss": 0.1682, "step": 3640 }, { "epoch": 42.16, "learning_rate": 7.634484378670425e-06, "loss": 0.1527, "step": 3650 }, { "epoch": 42.28, "learning_rate": 7.517030772844727e-06, "loss": 0.1615, "step": 3660 }, { "epoch": 42.39, "learning_rate": 7.399577167019029e-06, "loss": 0.1867, "step": 3670 }, { "epoch": 42.51, "learning_rate": 7.282123561193328e-06, "loss": 0.1696, "step": 3680 }, { "epoch": 42.62, "learning_rate": 7.16466995536763e-06, "loss": 0.1257, "step": 3690 }, { "epoch": 42.74, "learning_rate": 7.047216349541931e-06, "loss": 0.1549, "step": 3700 }, { "epoch": 42.85, "learning_rate": 6.929762743716233e-06, "loss": 0.1604, "step": 3710 }, { "epoch": 42.97, "learning_rate": 6.812309137890534e-06, "loss": 0.1945, "step": 3720 }, { "epoch": 42.99, "eval_accuracy": 0.9684601113172542, "eval_loss": 0.2014760673046112, "eval_runtime": 8.2434, "eval_samples_per_second": 65.386, "eval_steps_per_second": 8.249, "step": 3722 }, { "epoch": 43.09, "learning_rate": 6.694855532064835e-06, "loss": 0.1757, "step": 3730 }, { "epoch": 43.2, "learning_rate": 6.577401926239136e-06, "loss": 0.1401, "step": 3740 }, { "epoch": 43.32, "learning_rate": 6.4599483204134365e-06, "loss": 0.1655, "step": 3750 }, { "epoch": 43.43, "learning_rate": 6.3424947145877375e-06, "loss": 0.178, "step": 3760 }, { "epoch": 43.55, "learning_rate": 6.225041108762039e-06, "loss": 0.1277, "step": 3770 }, { "epoch": 43.66, "learning_rate": 6.10758750293634e-06, "loss": 0.1861, "step": 3780 }, { "epoch": 43.78, "learning_rate": 5.990133897110642e-06, "loss": 0.1634, "step": 3790 }, { "epoch": 43.89, "learning_rate": 5.872680291284942e-06, "loss": 0.1896, "step": 3800 }, { "epoch": 44.0, "eval_accuracy": 0.9666048237476809, "eval_loss": 0.11137495934963226, "eval_runtime": 8.1155, "eval_samples_per_second": 66.416, "eval_steps_per_second": 8.379, "step": 3809 }, { "epoch": 44.01, "learning_rate": 5.755226685459244e-06, "loss": 0.1602, "step": 3810 }, { "epoch": 44.13, "learning_rate": 5.637773079633545e-06, "loss": 0.1184, "step": 3820 }, { "epoch": 44.24, "learning_rate": 5.520319473807846e-06, "loss": 0.144, "step": 3830 }, { "epoch": 44.36, "learning_rate": 5.402865867982147e-06, "loss": 0.1956, "step": 3840 }, { "epoch": 44.47, "learning_rate": 5.285412262156449e-06, "loss": 0.1654, "step": 3850 }, { "epoch": 44.59, "learning_rate": 5.16795865633075e-06, "loss": 0.1889, "step": 3860 }, { "epoch": 44.7, "learning_rate": 5.050505050505051e-06, "loss": 0.108, "step": 3870 }, { "epoch": 44.82, "learning_rate": 4.933051444679352e-06, "loss": 0.1702, "step": 3880 }, { "epoch": 44.93, "learning_rate": 4.8155978388536535e-06, "loss": 0.1629, "step": 3890 }, { "epoch": 44.99, "eval_accuracy": 0.9666048237476809, "eval_loss": 0.0954408049583435, "eval_runtime": 8.1596, "eval_samples_per_second": 66.057, "eval_steps_per_second": 8.334, "step": 3895 }, { "epoch": 45.05, "learning_rate": 4.6981442330279544e-06, "loss": 0.1729, "step": 3900 }, { "epoch": 45.17, "learning_rate": 4.580690627202255e-06, "loss": 0.1781, "step": 3910 }, { "epoch": 45.28, "learning_rate": 4.463237021376556e-06, "loss": 0.1565, "step": 3920 }, { "epoch": 45.4, "learning_rate": 4.345783415550857e-06, "loss": 0.1445, "step": 3930 }, { "epoch": 45.51, "learning_rate": 4.228329809725159e-06, "loss": 0.1315, "step": 3940 }, { "epoch": 45.63, "learning_rate": 4.11087620389946e-06, "loss": 0.1578, "step": 3950 }, { "epoch": 45.74, "learning_rate": 3.993422598073761e-06, "loss": 0.1875, "step": 3960 }, { "epoch": 45.86, "learning_rate": 3.875968992248062e-06, "loss": 0.2048, "step": 3970 }, { "epoch": 45.97, "learning_rate": 3.7585153864223635e-06, "loss": 0.1825, "step": 3980 }, { "epoch": 46.0, "eval_accuracy": 0.974025974025974, "eval_loss": 0.09737637639045715, "eval_runtime": 8.2567, "eval_samples_per_second": 65.28, "eval_steps_per_second": 8.236, "step": 3982 }, { "epoch": 46.09, "learning_rate": 3.641061780596664e-06, "loss": 0.1715, "step": 3990 }, { "epoch": 46.2, "learning_rate": 3.5236081747709654e-06, "loss": 0.1679, "step": 4000 }, { "epoch": 46.32, "learning_rate": 3.406154568945267e-06, "loss": 0.1809, "step": 4010 }, { "epoch": 46.44, "learning_rate": 3.288700963119568e-06, "loss": 0.1582, "step": 4020 }, { "epoch": 46.55, "learning_rate": 3.1712473572938687e-06, "loss": 0.1497, "step": 4030 }, { "epoch": 46.67, "learning_rate": 3.05379375146817e-06, "loss": 0.1748, "step": 4040 }, { "epoch": 46.78, "learning_rate": 2.936340145642471e-06, "loss": 0.1893, "step": 4050 }, { "epoch": 46.9, "learning_rate": 2.8188865398167725e-06, "loss": 0.1664, "step": 4060 }, { "epoch": 46.99, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.09385673701763153, "eval_runtime": 8.1532, "eval_samples_per_second": 66.109, "eval_steps_per_second": 8.34, "step": 4068 }, { "epoch": 47.01, "learning_rate": 2.7014329339910735e-06, "loss": 0.1701, "step": 4070 }, { "epoch": 47.13, "learning_rate": 2.583979328165375e-06, "loss": 0.1427, "step": 4080 }, { "epoch": 47.24, "learning_rate": 2.466525722339676e-06, "loss": 0.1303, "step": 4090 }, { "epoch": 47.36, "learning_rate": 2.3490721165139772e-06, "loss": 0.1459, "step": 4100 }, { "epoch": 47.48, "learning_rate": 2.231618510688278e-06, "loss": 0.1548, "step": 4110 }, { "epoch": 47.59, "learning_rate": 2.1141649048625796e-06, "loss": 0.1562, "step": 4120 }, { "epoch": 47.71, "learning_rate": 1.9967112990368805e-06, "loss": 0.1668, "step": 4130 }, { "epoch": 47.82, "learning_rate": 1.8792576932111817e-06, "loss": 0.1512, "step": 4140 }, { "epoch": 47.94, "learning_rate": 1.7618040873854827e-06, "loss": 0.1535, "step": 4150 }, { "epoch": 48.0, "eval_accuracy": 0.9721706864564007, "eval_loss": 0.09351829439401627, "eval_runtime": 8.1415, "eval_samples_per_second": 66.204, "eval_steps_per_second": 8.352, "step": 4155 }, { "epoch": 48.05, "learning_rate": 1.644350481559784e-06, "loss": 0.1624, "step": 4160 }, { "epoch": 48.17, "learning_rate": 1.526896875734085e-06, "loss": 0.1332, "step": 4170 }, { "epoch": 48.28, "learning_rate": 1.4094432699083862e-06, "loss": 0.1957, "step": 4180 }, { "epoch": 48.4, "learning_rate": 1.2919896640826874e-06, "loss": 0.139, "step": 4190 }, { "epoch": 48.51, "learning_rate": 1.1745360582569886e-06, "loss": 0.1589, "step": 4200 }, { "epoch": 48.63, "learning_rate": 1.0570824524312898e-06, "loss": 0.1696, "step": 4210 }, { "epoch": 48.75, "learning_rate": 9.396288466055909e-07, "loss": 0.1712, "step": 4220 }, { "epoch": 48.86, "learning_rate": 8.22175240779892e-07, "loss": 0.1562, "step": 4230 }, { "epoch": 48.98, "learning_rate": 7.047216349541931e-07, "loss": 0.1801, "step": 4240 }, { "epoch": 49.0, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.09990726411342621, "eval_runtime": 8.1674, "eval_samples_per_second": 65.994, "eval_steps_per_second": 8.326, "step": 4242 }, { "epoch": 49.09, "learning_rate": 5.872680291284943e-07, "loss": 0.173, "step": 4250 }, { "epoch": 49.21, "learning_rate": 4.6981442330279543e-07, "loss": 0.1455, "step": 4260 }, { "epoch": 49.32, "learning_rate": 3.5236081747709656e-07, "loss": 0.1765, "step": 4270 }, { "epoch": 49.44, "learning_rate": 2.3490721165139772e-07, "loss": 0.1866, "step": 4280 }, { "epoch": 49.55, "learning_rate": 1.1745360582569886e-07, "loss": 0.1522, "step": 4290 }, { "epoch": 49.67, "learning_rate": 0.0, "loss": 0.1502, "step": 4300 }, { "epoch": 49.67, "eval_accuracy": 0.9703153988868275, "eval_loss": 0.19585400819778442, "eval_runtime": 8.3207, "eval_samples_per_second": 64.779, "eval_steps_per_second": 8.172, "step": 4300 }, { "epoch": 49.67, "step": 4300, "total_flos": 5.11036354111998e+18, "train_loss": 0.23105980243793753, "train_runtime": 6101.6743, "train_samples_per_second": 39.702, "train_steps_per_second": 0.705 } ], "max_steps": 4300, "num_train_epochs": 50, "total_flos": 5.11036354111998e+18, "trial_name": null, "trial_params": null }