{ "best_metric": 0.5296002626419067, "best_model_checkpoint": "./vit-base-16-thesis-demo-HAM10000/checkpoint-1300", "epoch": 4.0, "eval_steps": 50, "global_step": 1736, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019884792626728113, "loss": 1.8769, "step": 10 }, { "epoch": 0.05, "learning_rate": 0.00019769585253456222, "loss": 1.6987, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.00019654377880184333, "loss": 1.5002, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.00019539170506912442, "loss": 1.4262, "step": 40 }, { "epoch": 0.12, "learning_rate": 0.00019423963133640554, "loss": 1.4855, "step": 50 }, { "epoch": 0.12, "eval_accuracy": 0.5093457943925234, "eval_f1": 0.5093457943925234, "eval_loss": 1.351930856704712, "eval_precision": 0.5093457943925234, "eval_recall": 0.5093457943925234, "eval_runtime": 22.9282, "eval_samples_per_second": 65.335, "eval_steps_per_second": 8.2, "step": 50 }, { "epoch": 0.14, "learning_rate": 0.00019308755760368663, "loss": 1.2274, "step": 60 }, { "epoch": 0.16, "learning_rate": 0.00019193548387096775, "loss": 1.1676, "step": 70 }, { "epoch": 0.18, "learning_rate": 0.00019078341013824886, "loss": 1.0299, "step": 80 }, { "epoch": 0.21, "learning_rate": 0.00018963133640552998, "loss": 1.192, "step": 90 }, { "epoch": 0.23, "learning_rate": 0.00018847926267281107, "loss": 1.044, "step": 100 }, { "epoch": 0.23, "eval_accuracy": 0.6268357810413885, "eval_f1": 0.6268357810413885, "eval_loss": 1.0514885187149048, "eval_precision": 0.6268357810413885, "eval_recall": 0.6268357810413885, "eval_runtime": 23.586, "eval_samples_per_second": 63.512, "eval_steps_per_second": 7.971, "step": 100 }, { "epoch": 0.25, "learning_rate": 0.00018732718894009219, "loss": 1.1791, "step": 110 }, { "epoch": 0.28, "learning_rate": 0.00018617511520737328, "loss": 0.9907, "step": 120 }, { "epoch": 0.3, "learning_rate": 0.0001850230414746544, "loss": 0.9862, "step": 130 }, { "epoch": 0.32, "learning_rate": 0.00018387096774193548, "loss": 0.9692, "step": 140 }, { "epoch": 0.35, "learning_rate": 0.0001827188940092166, "loss": 1.0774, "step": 150 }, { "epoch": 0.35, "eval_accuracy": 0.568090787716956, "eval_f1": 0.568090787716956, "eval_loss": 1.2103569507598877, "eval_precision": 0.568090787716956, "eval_recall": 0.568090787716956, "eval_runtime": 23.5075, "eval_samples_per_second": 63.724, "eval_steps_per_second": 7.997, "step": 150 }, { "epoch": 0.37, "learning_rate": 0.0001815668202764977, "loss": 1.0483, "step": 160 }, { "epoch": 0.39, "learning_rate": 0.0001804147465437788, "loss": 0.8997, "step": 170 }, { "epoch": 0.41, "learning_rate": 0.0001792626728110599, "loss": 0.9513, "step": 180 }, { "epoch": 0.44, "learning_rate": 0.000178110599078341, "loss": 0.9172, "step": 190 }, { "epoch": 0.46, "learning_rate": 0.00017695852534562213, "loss": 0.9508, "step": 200 }, { "epoch": 0.46, "eval_accuracy": 0.6061415220293725, "eval_f1": 0.6061415220293725, "eval_loss": 1.0623714923858643, "eval_precision": 0.6061415220293725, "eval_recall": 0.6061415220293725, "eval_runtime": 23.18, "eval_samples_per_second": 64.625, "eval_steps_per_second": 8.11, "step": 200 }, { "epoch": 0.48, "learning_rate": 0.00017580645161290325, "loss": 0.7999, "step": 210 }, { "epoch": 0.51, "learning_rate": 0.00017465437788018436, "loss": 0.8259, "step": 220 }, { "epoch": 0.53, "learning_rate": 0.00017350230414746545, "loss": 0.6937, "step": 230 }, { "epoch": 0.55, "learning_rate": 0.00017235023041474657, "loss": 0.7381, "step": 240 }, { "epoch": 0.58, "learning_rate": 0.00017119815668202766, "loss": 0.9522, "step": 250 }, { "epoch": 0.58, "eval_accuracy": 0.6448598130841121, "eval_f1": 0.6448598130841121, "eval_loss": 0.9337974786758423, "eval_precision": 0.6448598130841121, "eval_recall": 0.6448598130841121, "eval_runtime": 23.4699, "eval_samples_per_second": 63.826, "eval_steps_per_second": 8.01, "step": 250 }, { "epoch": 0.6, "learning_rate": 0.00017004608294930878, "loss": 0.964, "step": 260 }, { "epoch": 0.62, "learning_rate": 0.00016889400921658987, "loss": 0.9078, "step": 270 }, { "epoch": 0.65, "learning_rate": 0.00016774193548387098, "loss": 0.8004, "step": 280 }, { "epoch": 0.67, "learning_rate": 0.00016658986175115207, "loss": 0.8192, "step": 290 }, { "epoch": 0.69, "learning_rate": 0.0001654377880184332, "loss": 0.774, "step": 300 }, { "epoch": 0.69, "eval_accuracy": 0.6675567423230975, "eval_f1": 0.6675567423230975, "eval_loss": 0.8939006328582764, "eval_precision": 0.6675567423230975, "eval_recall": 0.6675567423230975, "eval_runtime": 22.8187, "eval_samples_per_second": 65.648, "eval_steps_per_second": 8.239, "step": 300 }, { "epoch": 0.71, "learning_rate": 0.00016428571428571428, "loss": 0.6929, "step": 310 }, { "epoch": 0.74, "learning_rate": 0.0001631336405529954, "loss": 0.8671, "step": 320 }, { "epoch": 0.76, "learning_rate": 0.00016198156682027649, "loss": 0.843, "step": 330 }, { "epoch": 0.78, "learning_rate": 0.0001608294930875576, "loss": 0.7337, "step": 340 }, { "epoch": 0.81, "learning_rate": 0.00015967741935483872, "loss": 0.7675, "step": 350 }, { "epoch": 0.81, "eval_accuracy": 0.7182910547396528, "eval_f1": 0.7182910547396528, "eval_loss": 0.774177074432373, "eval_precision": 0.7182910547396528, "eval_recall": 0.7182910547396528, "eval_runtime": 23.9091, "eval_samples_per_second": 62.654, "eval_steps_per_second": 7.863, "step": 350 }, { "epoch": 0.83, "learning_rate": 0.00015852534562211984, "loss": 0.7298, "step": 360 }, { "epoch": 0.85, "learning_rate": 0.00015737327188940093, "loss": 0.716, "step": 370 }, { "epoch": 0.88, "learning_rate": 0.00015622119815668204, "loss": 0.6206, "step": 380 }, { "epoch": 0.9, "learning_rate": 0.00015506912442396313, "loss": 0.8554, "step": 390 }, { "epoch": 0.92, "learning_rate": 0.00015391705069124425, "loss": 0.7167, "step": 400 }, { "epoch": 0.92, "eval_accuracy": 0.7216288384512684, "eval_f1": 0.7216288384512684, "eval_loss": 0.7695363163948059, "eval_precision": 0.7216288384512684, "eval_recall": 0.7216288384512684, "eval_runtime": 23.7595, "eval_samples_per_second": 63.049, "eval_steps_per_second": 7.913, "step": 400 }, { "epoch": 0.94, "learning_rate": 0.00015276497695852537, "loss": 0.8005, "step": 410 }, { "epoch": 0.97, "learning_rate": 0.00015161290322580646, "loss": 0.6622, "step": 420 }, { "epoch": 0.99, "learning_rate": 0.00015046082949308757, "loss": 0.7918, "step": 430 }, { "epoch": 1.01, "learning_rate": 0.00014930875576036866, "loss": 0.5941, "step": 440 }, { "epoch": 1.04, "learning_rate": 0.00014815668202764978, "loss": 0.5204, "step": 450 }, { "epoch": 1.04, "eval_accuracy": 0.7303070761014686, "eval_f1": 0.7303070761014686, "eval_loss": 0.8005053400993347, "eval_precision": 0.7303070761014686, "eval_recall": 0.7303070761014686, "eval_runtime": 23.4092, "eval_samples_per_second": 63.992, "eval_steps_per_second": 8.031, "step": 450 }, { "epoch": 1.06, "learning_rate": 0.00014700460829493087, "loss": 0.6149, "step": 460 }, { "epoch": 1.08, "learning_rate": 0.00014585253456221199, "loss": 0.459, "step": 470 }, { "epoch": 1.11, "learning_rate": 0.0001447004608294931, "loss": 0.5677, "step": 480 }, { "epoch": 1.13, "learning_rate": 0.00014354838709677422, "loss": 0.5199, "step": 490 }, { "epoch": 1.15, "learning_rate": 0.0001423963133640553, "loss": 0.456, "step": 500 }, { "epoch": 1.15, "eval_accuracy": 0.6902536715620827, "eval_f1": 0.6902536715620827, "eval_loss": 0.8523401618003845, "eval_precision": 0.6902536715620827, "eval_recall": 0.6902536715620827, "eval_runtime": 24.2665, "eval_samples_per_second": 61.731, "eval_steps_per_second": 7.747, "step": 500 }, { "epoch": 1.18, "learning_rate": 0.00014124423963133643, "loss": 0.5875, "step": 510 }, { "epoch": 1.2, "learning_rate": 0.00014009216589861752, "loss": 0.4803, "step": 520 }, { "epoch": 1.22, "learning_rate": 0.00013894009216589863, "loss": 0.6385, "step": 530 }, { "epoch": 1.24, "learning_rate": 0.00013778801843317972, "loss": 0.5299, "step": 540 }, { "epoch": 1.27, "learning_rate": 0.00013663594470046084, "loss": 0.5421, "step": 550 }, { "epoch": 1.27, "eval_accuracy": 0.7543391188251002, "eval_f1": 0.7543391188251003, "eval_loss": 0.6753360629081726, "eval_precision": 0.7543391188251002, "eval_recall": 0.7543391188251002, "eval_runtime": 24.4291, "eval_samples_per_second": 61.32, "eval_steps_per_second": 7.696, "step": 550 }, { "epoch": 1.29, "learning_rate": 0.00013548387096774193, "loss": 0.4458, "step": 560 }, { "epoch": 1.31, "learning_rate": 0.00013433179723502305, "loss": 0.5533, "step": 570 }, { "epoch": 1.34, "learning_rate": 0.00013317972350230414, "loss": 0.4426, "step": 580 }, { "epoch": 1.36, "learning_rate": 0.00013202764976958525, "loss": 0.4254, "step": 590 }, { "epoch": 1.38, "learning_rate": 0.00013087557603686637, "loss": 0.4446, "step": 600 }, { "epoch": 1.38, "eval_accuracy": 0.7810413885180241, "eval_f1": 0.7810413885180241, "eval_loss": 0.6042020320892334, "eval_precision": 0.7810413885180241, "eval_recall": 0.7810413885180241, "eval_runtime": 24.5813, "eval_samples_per_second": 60.941, "eval_steps_per_second": 7.648, "step": 600 }, { "epoch": 1.41, "learning_rate": 0.00012972350230414746, "loss": 0.5428, "step": 610 }, { "epoch": 1.43, "learning_rate": 0.00012857142857142858, "loss": 0.5215, "step": 620 }, { "epoch": 1.45, "learning_rate": 0.0001274193548387097, "loss": 0.4636, "step": 630 }, { "epoch": 1.47, "learning_rate": 0.0001262672811059908, "loss": 0.5301, "step": 640 }, { "epoch": 1.5, "learning_rate": 0.0001251152073732719, "loss": 0.455, "step": 650 }, { "epoch": 1.5, "eval_accuracy": 0.7409879839786382, "eval_f1": 0.7409879839786382, "eval_loss": 0.6912689208984375, "eval_precision": 0.7409879839786382, "eval_recall": 0.7409879839786382, "eval_runtime": 24.0409, "eval_samples_per_second": 62.31, "eval_steps_per_second": 7.82, "step": 650 }, { "epoch": 1.52, "learning_rate": 0.00012396313364055302, "loss": 0.6054, "step": 660 }, { "epoch": 1.54, "learning_rate": 0.0001228110599078341, "loss": 0.4855, "step": 670 }, { "epoch": 1.57, "learning_rate": 0.00012165898617511522, "loss": 0.4318, "step": 680 }, { "epoch": 1.59, "learning_rate": 0.00012050691244239631, "loss": 0.5567, "step": 690 }, { "epoch": 1.61, "learning_rate": 0.00011935483870967743, "loss": 0.4175, "step": 700 }, { "epoch": 1.61, "eval_accuracy": 0.7810413885180241, "eval_f1": 0.7810413885180241, "eval_loss": 0.6142333745956421, "eval_precision": 0.7810413885180241, "eval_recall": 0.7810413885180241, "eval_runtime": 24.8763, "eval_samples_per_second": 60.218, "eval_steps_per_second": 7.557, "step": 700 }, { "epoch": 1.64, "learning_rate": 0.00011820276497695852, "loss": 0.6193, "step": 710 }, { "epoch": 1.66, "learning_rate": 0.00011705069124423964, "loss": 0.4553, "step": 720 }, { "epoch": 1.68, "learning_rate": 0.00011589861751152074, "loss": 0.4813, "step": 730 }, { "epoch": 1.71, "learning_rate": 0.00011474654377880186, "loss": 0.4984, "step": 740 }, { "epoch": 1.73, "learning_rate": 0.00011359447004608295, "loss": 0.3626, "step": 750 }, { "epoch": 1.73, "eval_accuracy": 0.8004005340453939, "eval_f1": 0.8004005340453939, "eval_loss": 0.5830577611923218, "eval_precision": 0.8004005340453939, "eval_recall": 0.8004005340453939, "eval_runtime": 24.5527, "eval_samples_per_second": 61.012, "eval_steps_per_second": 7.657, "step": 750 }, { "epoch": 1.75, "learning_rate": 0.00011244239631336406, "loss": 0.3285, "step": 760 }, { "epoch": 1.77, "learning_rate": 0.00011140552995391706, "loss": 0.5726, "step": 770 }, { "epoch": 1.8, "learning_rate": 0.00011025345622119817, "loss": 0.4853, "step": 780 }, { "epoch": 1.82, "learning_rate": 0.00010910138248847928, "loss": 0.5274, "step": 790 }, { "epoch": 1.84, "learning_rate": 0.00010794930875576037, "loss": 0.4816, "step": 800 }, { "epoch": 1.84, "eval_accuracy": 0.7890520694259012, "eval_f1": 0.7890520694259012, "eval_loss": 0.5586402416229248, "eval_precision": 0.7890520694259012, "eval_recall": 0.7890520694259012, "eval_runtime": 25.1015, "eval_samples_per_second": 59.678, "eval_steps_per_second": 7.49, "step": 800 }, { "epoch": 1.87, "learning_rate": 0.00010679723502304149, "loss": 0.4794, "step": 810 }, { "epoch": 1.89, "learning_rate": 0.00010564516129032258, "loss": 0.418, "step": 820 }, { "epoch": 1.91, "learning_rate": 0.0001044930875576037, "loss": 0.3756, "step": 830 }, { "epoch": 1.94, "learning_rate": 0.00010334101382488478, "loss": 0.5326, "step": 840 }, { "epoch": 1.96, "learning_rate": 0.0001021889400921659, "loss": 0.3257, "step": 850 }, { "epoch": 1.96, "eval_accuracy": 0.7990654205607477, "eval_f1": 0.7990654205607477, "eval_loss": 0.5758947134017944, "eval_precision": 0.7990654205607477, "eval_recall": 0.7990654205607477, "eval_runtime": 23.5154, "eval_samples_per_second": 63.703, "eval_steps_per_second": 7.995, "step": 850 }, { "epoch": 1.98, "learning_rate": 0.000101036866359447, "loss": 0.3173, "step": 860 }, { "epoch": 2.0, "learning_rate": 9.988479262672812e-05, "loss": 0.3198, "step": 870 }, { "epoch": 2.03, "learning_rate": 9.873271889400923e-05, "loss": 0.2706, "step": 880 }, { "epoch": 2.05, "learning_rate": 9.758064516129033e-05, "loss": 0.2695, "step": 890 }, { "epoch": 2.07, "learning_rate": 9.642857142857143e-05, "loss": 0.3111, "step": 900 }, { "epoch": 2.07, "eval_accuracy": 0.7930574098798397, "eval_f1": 0.7930574098798397, "eval_loss": 0.6100362539291382, "eval_precision": 0.7930574098798397, "eval_recall": 0.7930574098798397, "eval_runtime": 22.7882, "eval_samples_per_second": 65.736, "eval_steps_per_second": 8.25, "step": 900 }, { "epoch": 2.1, "learning_rate": 9.527649769585254e-05, "loss": 0.2007, "step": 910 }, { "epoch": 2.12, "learning_rate": 9.412442396313365e-05, "loss": 0.2584, "step": 920 }, { "epoch": 2.14, "learning_rate": 9.297235023041476e-05, "loss": 0.257, "step": 930 }, { "epoch": 2.17, "learning_rate": 9.182027649769586e-05, "loss": 0.3709, "step": 940 }, { "epoch": 2.19, "learning_rate": 9.066820276497696e-05, "loss": 0.2052, "step": 950 }, { "epoch": 2.19, "eval_accuracy": 0.8110814419225634, "eval_f1": 0.8110814419225634, "eval_loss": 0.5674178600311279, "eval_precision": 0.8110814419225634, "eval_recall": 0.8110814419225634, "eval_runtime": 23.6606, "eval_samples_per_second": 63.312, "eval_steps_per_second": 7.946, "step": 950 }, { "epoch": 2.21, "learning_rate": 8.951612903225806e-05, "loss": 0.1869, "step": 960 }, { "epoch": 2.24, "learning_rate": 8.836405529953917e-05, "loss": 0.3195, "step": 970 }, { "epoch": 2.26, "learning_rate": 8.721198156682027e-05, "loss": 0.3406, "step": 980 }, { "epoch": 2.28, "learning_rate": 8.605990783410139e-05, "loss": 0.26, "step": 990 }, { "epoch": 2.3, "learning_rate": 8.490783410138249e-05, "loss": 0.2273, "step": 1000 }, { "epoch": 2.3, "eval_accuracy": 0.80173564753004, "eval_f1": 0.8017356475300401, "eval_loss": 0.5974730253219604, "eval_precision": 0.80173564753004, "eval_recall": 0.80173564753004, "eval_runtime": 22.7233, "eval_samples_per_second": 65.923, "eval_steps_per_second": 8.273, "step": 1000 }, { "epoch": 2.33, "learning_rate": 8.37557603686636e-05, "loss": 0.2222, "step": 1010 }, { "epoch": 2.35, "learning_rate": 8.26036866359447e-05, "loss": 0.2741, "step": 1020 }, { "epoch": 2.37, "learning_rate": 8.145161290322582e-05, "loss": 0.2503, "step": 1030 }, { "epoch": 2.4, "learning_rate": 8.029953917050692e-05, "loss": 0.2778, "step": 1040 }, { "epoch": 2.42, "learning_rate": 7.914746543778802e-05, "loss": 0.3007, "step": 1050 }, { "epoch": 2.42, "eval_accuracy": 0.8204272363150867, "eval_f1": 0.8204272363150867, "eval_loss": 0.5713711977005005, "eval_precision": 0.8204272363150867, "eval_recall": 0.8204272363150867, "eval_runtime": 23.4991, "eval_samples_per_second": 63.747, "eval_steps_per_second": 8.0, "step": 1050 }, { "epoch": 2.44, "learning_rate": 7.799539170506914e-05, "loss": 0.2108, "step": 1060 }, { "epoch": 2.47, "learning_rate": 7.684331797235024e-05, "loss": 0.2268, "step": 1070 }, { "epoch": 2.49, "learning_rate": 7.569124423963135e-05, "loss": 0.1861, "step": 1080 }, { "epoch": 2.51, "learning_rate": 7.453917050691245e-05, "loss": 0.2302, "step": 1090 }, { "epoch": 2.53, "learning_rate": 7.338709677419355e-05, "loss": 0.2812, "step": 1100 }, { "epoch": 2.53, "eval_accuracy": 0.8004005340453939, "eval_f1": 0.8004005340453939, "eval_loss": 0.6080873608589172, "eval_precision": 0.8004005340453939, "eval_recall": 0.8004005340453939, "eval_runtime": 23.4447, "eval_samples_per_second": 63.895, "eval_steps_per_second": 8.019, "step": 1100 }, { "epoch": 2.56, "learning_rate": 7.223502304147465e-05, "loss": 0.2486, "step": 1110 }, { "epoch": 2.58, "learning_rate": 7.108294930875576e-05, "loss": 0.1691, "step": 1120 }, { "epoch": 2.6, "learning_rate": 6.993087557603687e-05, "loss": 0.2579, "step": 1130 }, { "epoch": 2.63, "learning_rate": 6.877880184331798e-05, "loss": 0.2635, "step": 1140 }, { "epoch": 2.65, "learning_rate": 6.762672811059908e-05, "loss": 0.2661, "step": 1150 }, { "epoch": 2.65, "eval_accuracy": 0.822429906542056, "eval_f1": 0.822429906542056, "eval_loss": 0.5652973651885986, "eval_precision": 0.822429906542056, "eval_recall": 0.822429906542056, "eval_runtime": 23.861, "eval_samples_per_second": 62.78, "eval_steps_per_second": 7.879, "step": 1150 }, { "epoch": 2.67, "learning_rate": 6.647465437788018e-05, "loss": 0.3103, "step": 1160 }, { "epoch": 2.7, "learning_rate": 6.532258064516129e-05, "loss": 0.2789, "step": 1170 }, { "epoch": 2.72, "learning_rate": 6.417050691244239e-05, "loss": 0.1855, "step": 1180 }, { "epoch": 2.74, "learning_rate": 6.301843317972351e-05, "loss": 0.1857, "step": 1190 }, { "epoch": 2.76, "learning_rate": 6.186635944700461e-05, "loss": 0.1796, "step": 1200 }, { "epoch": 2.76, "eval_accuracy": 0.8337783711615487, "eval_f1": 0.8337783711615487, "eval_loss": 0.544723629951477, "eval_precision": 0.8337783711615487, "eval_recall": 0.8337783711615487, "eval_runtime": 23.9024, "eval_samples_per_second": 62.672, "eval_steps_per_second": 7.865, "step": 1200 }, { "epoch": 2.79, "learning_rate": 6.0714285714285715e-05, "loss": 0.1997, "step": 1210 }, { "epoch": 2.81, "learning_rate": 5.956221198156682e-05, "loss": 0.2206, "step": 1220 }, { "epoch": 2.83, "learning_rate": 5.8410138248847935e-05, "loss": 0.1756, "step": 1230 }, { "epoch": 2.86, "learning_rate": 5.725806451612904e-05, "loss": 0.232, "step": 1240 }, { "epoch": 2.88, "learning_rate": 5.610599078341015e-05, "loss": 0.1882, "step": 1250 }, { "epoch": 2.88, "eval_accuracy": 0.828437917222964, "eval_f1": 0.828437917222964, "eval_loss": 0.5357471704483032, "eval_precision": 0.828437917222964, "eval_recall": 0.828437917222964, "eval_runtime": 23.7377, "eval_samples_per_second": 63.106, "eval_steps_per_second": 7.92, "step": 1250 }, { "epoch": 2.9, "learning_rate": 5.495391705069125e-05, "loss": 0.1958, "step": 1260 }, { "epoch": 2.93, "learning_rate": 5.3801843317972355e-05, "loss": 0.1584, "step": 1270 }, { "epoch": 2.95, "learning_rate": 5.264976958525346e-05, "loss": 0.1785, "step": 1280 }, { "epoch": 2.97, "learning_rate": 5.149769585253457e-05, "loss": 0.144, "step": 1290 }, { "epoch": 3.0, "learning_rate": 5.034562211981567e-05, "loss": 0.1596, "step": 1300 }, { "epoch": 3.0, "eval_accuracy": 0.8344459279038718, "eval_f1": 0.834445927903872, "eval_loss": 0.5296002626419067, "eval_precision": 0.8344459279038718, "eval_recall": 0.8344459279038718, "eval_runtime": 23.9184, "eval_samples_per_second": 62.629, "eval_steps_per_second": 7.86, "step": 1300 }, { "epoch": 3.02, "learning_rate": 4.9193548387096775e-05, "loss": 0.0465, "step": 1310 }, { "epoch": 3.04, "learning_rate": 4.8041474654377885e-05, "loss": 0.1005, "step": 1320 }, { "epoch": 3.06, "learning_rate": 4.688940092165899e-05, "loss": 0.133, "step": 1330 }, { "epoch": 3.09, "learning_rate": 4.573732718894009e-05, "loss": 0.1057, "step": 1340 }, { "epoch": 3.11, "learning_rate": 4.45852534562212e-05, "loss": 0.075, "step": 1350 }, { "epoch": 3.11, "eval_accuracy": 0.8197596795727636, "eval_f1": 0.8197596795727636, "eval_loss": 0.5875993967056274, "eval_precision": 0.8197596795727636, "eval_recall": 0.8197596795727636, "eval_runtime": 24.1647, "eval_samples_per_second": 61.991, "eval_steps_per_second": 7.78, "step": 1350 }, { "epoch": 3.13, "learning_rate": 4.3433179723502305e-05, "loss": 0.0836, "step": 1360 }, { "epoch": 3.16, "learning_rate": 4.228110599078341e-05, "loss": 0.0834, "step": 1370 }, { "epoch": 3.18, "learning_rate": 4.112903225806452e-05, "loss": 0.1301, "step": 1380 }, { "epoch": 3.2, "learning_rate": 3.997695852534563e-05, "loss": 0.0711, "step": 1390 }, { "epoch": 3.23, "learning_rate": 3.882488479262673e-05, "loss": 0.1128, "step": 1400 }, { "epoch": 3.23, "eval_accuracy": 0.8337783711615487, "eval_f1": 0.8337783711615487, "eval_loss": 0.56122225522995, "eval_precision": 0.8337783711615487, "eval_recall": 0.8337783711615487, "eval_runtime": 24.1153, "eval_samples_per_second": 62.118, "eval_steps_per_second": 7.796, "step": 1400 }, { "epoch": 3.25, "learning_rate": 3.7672811059907835e-05, "loss": 0.0848, "step": 1410 }, { "epoch": 3.27, "learning_rate": 3.6520737327188945e-05, "loss": 0.0885, "step": 1420 }, { "epoch": 3.29, "learning_rate": 3.536866359447005e-05, "loss": 0.0432, "step": 1430 }, { "epoch": 3.32, "learning_rate": 3.421658986175115e-05, "loss": 0.0557, "step": 1440 }, { "epoch": 3.34, "learning_rate": 3.306451612903226e-05, "loss": 0.0677, "step": 1450 }, { "epoch": 3.34, "eval_accuracy": 0.8331108144192256, "eval_f1": 0.8331108144192256, "eval_loss": 0.5910764932632446, "eval_precision": 0.8331108144192256, "eval_recall": 0.8331108144192256, "eval_runtime": 22.9405, "eval_samples_per_second": 65.299, "eval_steps_per_second": 8.195, "step": 1450 }, { "epoch": 3.36, "learning_rate": 3.1912442396313365e-05, "loss": 0.0931, "step": 1460 }, { "epoch": 3.39, "learning_rate": 3.076036866359447e-05, "loss": 0.0276, "step": 1470 }, { "epoch": 3.41, "learning_rate": 2.960829493087558e-05, "loss": 0.0311, "step": 1480 }, { "epoch": 3.43, "learning_rate": 2.8456221198156685e-05, "loss": 0.0513, "step": 1490 }, { "epoch": 3.46, "learning_rate": 2.730414746543779e-05, "loss": 0.0794, "step": 1500 }, { "epoch": 3.46, "eval_accuracy": 0.8304405874499332, "eval_f1": 0.8304405874499331, "eval_loss": 0.5970631837844849, "eval_precision": 0.8304405874499332, "eval_recall": 0.8304405874499332, "eval_runtime": 24.072, "eval_samples_per_second": 62.23, "eval_steps_per_second": 7.81, "step": 1500 }, { "epoch": 3.48, "learning_rate": 2.6152073732718895e-05, "loss": 0.0655, "step": 1510 }, { "epoch": 3.5, "learning_rate": 2.5e-05, "loss": 0.0626, "step": 1520 }, { "epoch": 3.53, "learning_rate": 2.3847926267281108e-05, "loss": 0.0936, "step": 1530 }, { "epoch": 3.55, "learning_rate": 2.269585253456221e-05, "loss": 0.0507, "step": 1540 }, { "epoch": 3.57, "learning_rate": 2.154377880184332e-05, "loss": 0.0367, "step": 1550 }, { "epoch": 3.57, "eval_accuracy": 0.8377837116154874, "eval_f1": 0.8377837116154874, "eval_loss": 0.5634308457374573, "eval_precision": 0.8377837116154874, "eval_recall": 0.8377837116154874, "eval_runtime": 24.4701, "eval_samples_per_second": 61.218, "eval_steps_per_second": 7.683, "step": 1550 }, { "epoch": 3.59, "learning_rate": 2.0391705069124424e-05, "loss": 0.1449, "step": 1560 }, { "epoch": 3.62, "learning_rate": 1.923963133640553e-05, "loss": 0.0286, "step": 1570 }, { "epoch": 3.64, "learning_rate": 1.8087557603686638e-05, "loss": 0.0877, "step": 1580 }, { "epoch": 3.66, "learning_rate": 1.693548387096774e-05, "loss": 0.0332, "step": 1590 }, { "epoch": 3.69, "learning_rate": 1.578341013824885e-05, "loss": 0.0279, "step": 1600 }, { "epoch": 3.69, "eval_accuracy": 0.8391188251001335, "eval_f1": 0.8391188251001335, "eval_loss": 0.5674164891242981, "eval_precision": 0.8391188251001335, "eval_recall": 0.8391188251001335, "eval_runtime": 23.0964, "eval_samples_per_second": 64.859, "eval_steps_per_second": 8.14, "step": 1600 }, { "epoch": 3.71, "learning_rate": 1.4631336405529954e-05, "loss": 0.0474, "step": 1610 }, { "epoch": 3.73, "learning_rate": 1.3479262672811061e-05, "loss": 0.0546, "step": 1620 }, { "epoch": 3.76, "learning_rate": 1.2327188940092166e-05, "loss": 0.1128, "step": 1630 }, { "epoch": 3.78, "learning_rate": 1.1175115207373273e-05, "loss": 0.0242, "step": 1640 }, { "epoch": 3.8, "learning_rate": 1.0023041474654378e-05, "loss": 0.0216, "step": 1650 }, { "epoch": 3.8, "eval_accuracy": 0.835781041388518, "eval_f1": 0.835781041388518, "eval_loss": 0.5776631236076355, "eval_precision": 0.835781041388518, "eval_recall": 0.835781041388518, "eval_runtime": 23.9672, "eval_samples_per_second": 62.502, "eval_steps_per_second": 7.844, "step": 1650 }, { "epoch": 3.82, "learning_rate": 8.870967741935484e-06, "loss": 0.0495, "step": 1660 }, { "epoch": 3.85, "learning_rate": 7.71889400921659e-06, "loss": 0.0738, "step": 1670 }, { "epoch": 3.87, "learning_rate": 6.566820276497695e-06, "loss": 0.0212, "step": 1680 }, { "epoch": 3.89, "learning_rate": 5.414746543778802e-06, "loss": 0.0874, "step": 1690 }, { "epoch": 3.92, "learning_rate": 4.2626728110599085e-06, "loss": 0.0161, "step": 1700 }, { "epoch": 3.92, "eval_accuracy": 0.8437917222963952, "eval_f1": 0.8437917222963952, "eval_loss": 0.5607731342315674, "eval_precision": 0.8437917222963952, "eval_recall": 0.8437917222963952, "eval_runtime": 22.8075, "eval_samples_per_second": 65.68, "eval_steps_per_second": 8.243, "step": 1700 }, { "epoch": 3.94, "learning_rate": 3.110599078341014e-06, "loss": 0.018, "step": 1710 }, { "epoch": 3.96, "learning_rate": 1.9585253456221198e-06, "loss": 0.0698, "step": 1720 }, { "epoch": 3.99, "learning_rate": 8.064516129032258e-07, "loss": 0.0687, "step": 1730 }, { "epoch": 4.0, "step": 1736, "total_flos": 2.1525139607212524e+18, "train_loss": 0.438499038875927, "train_runtime": 1854.8343, "train_samples_per_second": 14.975, "train_steps_per_second": 0.936 } ], "logging_steps": 10, "max_steps": 1736, "num_train_epochs": 4, "save_steps": 50, "total_flos": 2.1525139607212524e+18, "trial_name": null, "trial_params": null }