{ "best_metric": 0.9793991416309012, "best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-255", "epoch": 9.882352941176471, "eval_steps": 500, "global_step": 630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 0.9944203495979309, "learning_rate": 0.004920634920634921, "loss": 0.6411, "step": 10 }, { "epoch": 0.31, "grad_norm": 1.986366868019104, "learning_rate": 0.004841269841269842, "loss": 0.3966, "step": 20 }, { "epoch": 0.47, "grad_norm": 1.2478023767471313, "learning_rate": 0.0047619047619047615, "loss": 0.2835, "step": 30 }, { "epoch": 0.63, "grad_norm": 0.9372655749320984, "learning_rate": 0.004682539682539683, "loss": 0.2194, "step": 40 }, { "epoch": 0.78, "grad_norm": 0.834052562713623, "learning_rate": 0.004603174603174603, "loss": 0.4268, "step": 50 }, { "epoch": 0.94, "grad_norm": 0.8572702407836914, "learning_rate": 0.004523809523809524, "loss": 0.3305, "step": 60 }, { "epoch": 0.99, "eval_accuracy": 0.936480686695279, "eval_f1": 0.9119407558733402, "eval_loss": 0.16002865135669708, "eval_precision": 0.9478312410980476, "eval_recall": 0.886801607677731, "eval_runtime": 18.303, "eval_samples_per_second": 63.651, "eval_steps_per_second": 3.988, "step": 63 }, { "epoch": 1.1, "grad_norm": 1.092854619026184, "learning_rate": 0.0044444444444444444, "loss": 0.2775, "step": 70 }, { "epoch": 1.25, "grad_norm": 1.632826328277588, "learning_rate": 0.004365079365079365, "loss": 0.2432, "step": 80 }, { "epoch": 1.41, "grad_norm": 0.9421872496604919, "learning_rate": 0.004285714285714286, "loss": 0.2564, "step": 90 }, { "epoch": 1.57, "grad_norm": 3.025440216064453, "learning_rate": 0.004206349206349207, "loss": 0.2726, "step": 100 }, { "epoch": 1.73, "grad_norm": 0.8582188487052917, "learning_rate": 0.0041269841269841265, "loss": 0.2958, "step": 110 }, { "epoch": 1.88, "grad_norm": 1.093877911567688, "learning_rate": 0.004047619047619048, "loss": 0.2335, "step": 120 }, { "epoch": 1.99, "eval_accuracy": 0.9313304721030042, "eval_f1": 0.9166446055107573, "eval_loss": 0.15517598390579224, "eval_precision": 0.8967545322648443, "eval_recall": 0.9471582920716426, "eval_runtime": 18.7183, "eval_samples_per_second": 62.239, "eval_steps_per_second": 3.9, "step": 127 }, { "epoch": 2.04, "grad_norm": 1.588196873664856, "learning_rate": 0.003968253968253968, "loss": 0.1984, "step": 130 }, { "epoch": 2.2, "grad_norm": 0.8592113256454468, "learning_rate": 0.003896825396825397, "loss": 0.2741, "step": 140 }, { "epoch": 2.35, "grad_norm": 1.1290347576141357, "learning_rate": 0.003817460317460317, "loss": 0.2253, "step": 150 }, { "epoch": 2.51, "grad_norm": 0.8879281878471375, "learning_rate": 0.0037380952380952383, "loss": 0.2726, "step": 160 }, { "epoch": 2.67, "grad_norm": 0.8666319847106934, "learning_rate": 0.0036587301587301586, "loss": 0.2226, "step": 170 }, { "epoch": 2.82, "grad_norm": 1.2298210859298706, "learning_rate": 0.0035793650793650793, "loss": 0.1967, "step": 180 }, { "epoch": 2.98, "grad_norm": 0.7905568480491638, "learning_rate": 0.0034999999999999996, "loss": 0.1977, "step": 190 }, { "epoch": 3.0, "eval_accuracy": 0.9733905579399141, "eval_f1": 0.9659005824707372, "eval_loss": 0.08546662330627441, "eval_precision": 0.9607843137254902, "eval_recall": 0.9713575096277278, "eval_runtime": 18.7005, "eval_samples_per_second": 62.298, "eval_steps_per_second": 3.904, "step": 191 }, { "epoch": 3.14, "grad_norm": 0.9821394681930542, "learning_rate": 0.003420634920634921, "loss": 0.2158, "step": 200 }, { "epoch": 3.29, "grad_norm": 0.6182402968406677, "learning_rate": 0.003341269841269841, "loss": 0.1528, "step": 210 }, { "epoch": 3.45, "grad_norm": 0.6537771821022034, "learning_rate": 0.003261904761904762, "loss": 0.2038, "step": 220 }, { "epoch": 3.61, "grad_norm": 1.0976545810699463, "learning_rate": 0.0031825396825396826, "loss": 0.217, "step": 230 }, { "epoch": 3.76, "grad_norm": 0.581605076789856, "learning_rate": 0.0031031746031746034, "loss": 0.2044, "step": 240 }, { "epoch": 3.92, "grad_norm": 0.8219888806343079, "learning_rate": 0.0030238095238095237, "loss": 0.1746, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.9793991416309012, "eval_f1": 0.972945356587167, "eval_loss": 0.0870010182261467, "eval_precision": 0.9794167490467448, "eval_recall": 0.9669104162846139, "eval_runtime": 18.7359, "eval_samples_per_second": 62.18, "eval_steps_per_second": 3.896, "step": 255 }, { "epoch": 4.08, "grad_norm": 0.45729899406433105, "learning_rate": 0.0029444444444444444, "loss": 0.1785, "step": 260 }, { "epoch": 4.24, "grad_norm": 0.9842467904090881, "learning_rate": 0.002865079365079365, "loss": 0.1507, "step": 270 }, { "epoch": 4.39, "grad_norm": 2.326084852218628, "learning_rate": 0.002785714285714286, "loss": 0.2188, "step": 280 }, { "epoch": 4.55, "grad_norm": 0.7227071523666382, "learning_rate": 0.002706349206349206, "loss": 0.1853, "step": 290 }, { "epoch": 4.71, "grad_norm": 1.1356016397476196, "learning_rate": 0.002626984126984127, "loss": 0.1671, "step": 300 }, { "epoch": 4.86, "grad_norm": 0.6211657524108887, "learning_rate": 0.0025476190476190477, "loss": 0.1797, "step": 310 }, { "epoch": 4.99, "eval_accuracy": 0.9699570815450643, "eval_f1": 0.961658208434536, "eval_loss": 0.0828637108206749, "eval_precision": 0.95490488153731, "eval_recall": 0.969034629256067, "eval_runtime": 18.2407, "eval_samples_per_second": 63.868, "eval_steps_per_second": 4.002, "step": 318 }, { "epoch": 5.02, "grad_norm": 0.3646533787250519, "learning_rate": 0.0024682539682539684, "loss": 0.1636, "step": 320 }, { "epoch": 5.18, "grad_norm": 0.5121908187866211, "learning_rate": 0.002388888888888889, "loss": 0.1463, "step": 330 }, { "epoch": 5.33, "grad_norm": 0.34603381156921387, "learning_rate": 0.0023095238095238095, "loss": 0.1284, "step": 340 }, { "epoch": 5.49, "grad_norm": 1.5245040655136108, "learning_rate": 0.0022301587301587302, "loss": 0.1307, "step": 350 }, { "epoch": 5.65, "grad_norm": 1.0758203268051147, "learning_rate": 0.002150793650793651, "loss": 0.1912, "step": 360 }, { "epoch": 5.8, "grad_norm": 0.9437044858932495, "learning_rate": 0.0020714285714285717, "loss": 0.1942, "step": 370 }, { "epoch": 5.96, "grad_norm": 0.6630730628967285, "learning_rate": 0.001992063492063492, "loss": 0.1436, "step": 380 }, { "epoch": 5.99, "eval_accuracy": 0.9708154506437768, "eval_f1": 0.9627915802446869, "eval_loss": 0.0797128826379776, "eval_precision": 0.9556120562130177, "eval_recall": 0.9706793660981723, "eval_runtime": 18.3826, "eval_samples_per_second": 63.375, "eval_steps_per_second": 3.971, "step": 382 }, { "epoch": 6.12, "grad_norm": 0.5699043869972229, "learning_rate": 0.0019126984126984126, "loss": 0.1704, "step": 390 }, { "epoch": 6.27, "grad_norm": 0.6287882328033447, "learning_rate": 0.0018333333333333333, "loss": 0.1034, "step": 400 }, { "epoch": 6.43, "grad_norm": 0.4099302589893341, "learning_rate": 0.0017539682539682538, "loss": 0.1633, "step": 410 }, { "epoch": 6.59, "grad_norm": 0.24668000638484955, "learning_rate": 0.0016746031746031746, "loss": 0.1477, "step": 420 }, { "epoch": 6.75, "grad_norm": 0.6959215998649597, "learning_rate": 0.001595238095238095, "loss": 0.1578, "step": 430 }, { "epoch": 6.9, "grad_norm": 0.4429934620857239, "learning_rate": 0.0015158730158730158, "loss": 0.1632, "step": 440 }, { "epoch": 7.0, "eval_accuracy": 0.9699570815450643, "eval_f1": 0.9621164611060785, "eval_loss": 0.08162170648574829, "eval_precision": 0.9507741239032943, "eval_recall": 0.9754187297512072, "eval_runtime": 18.5501, "eval_samples_per_second": 62.803, "eval_steps_per_second": 3.935, "step": 446 }, { "epoch": 7.06, "grad_norm": 0.3358498513698578, "learning_rate": 0.0014365079365079364, "loss": 0.1628, "step": 450 }, { "epoch": 7.22, "grad_norm": 1.001493215560913, "learning_rate": 0.0013571428571428571, "loss": 0.1495, "step": 460 }, { "epoch": 7.37, "grad_norm": 1.0969635248184204, "learning_rate": 0.0012777777777777776, "loss": 0.1972, "step": 470 }, { "epoch": 7.53, "grad_norm": 0.4329684376716614, "learning_rate": 0.0011984126984126984, "loss": 0.1538, "step": 480 }, { "epoch": 7.69, "grad_norm": 0.426087886095047, "learning_rate": 0.0011190476190476191, "loss": 0.1291, "step": 490 }, { "epoch": 7.84, "grad_norm": 0.24835826456546783, "learning_rate": 0.0010396825396825396, "loss": 0.1301, "step": 500 }, { "epoch": 8.0, "grad_norm": 0.3314363956451416, "learning_rate": 0.0009603174603174604, "loss": 0.1125, "step": 510 }, { "epoch": 8.0, "eval_accuracy": 0.9613733905579399, "eval_f1": 0.9519463889441405, "eval_loss": 0.10071194916963577, "eval_precision": 0.9364616472251679, "eval_recall": 0.9717395623204352, "eval_runtime": 18.4952, "eval_samples_per_second": 62.989, "eval_steps_per_second": 3.947, "step": 510 }, { "epoch": 8.16, "grad_norm": 0.4465714693069458, "learning_rate": 0.000880952380952381, "loss": 0.1279, "step": 520 }, { "epoch": 8.31, "grad_norm": 0.7344756126403809, "learning_rate": 0.0008015873015873017, "loss": 0.1408, "step": 530 }, { "epoch": 8.47, "grad_norm": 2.2782132625579834, "learning_rate": 0.0007222222222222222, "loss": 0.134, "step": 540 }, { "epoch": 8.63, "grad_norm": 0.8408999443054199, "learning_rate": 0.0006428571428571428, "loss": 0.1193, "step": 550 }, { "epoch": 8.78, "grad_norm": 0.4357975721359253, "learning_rate": 0.0005634920634920636, "loss": 0.114, "step": 560 }, { "epoch": 8.94, "grad_norm": 0.6790524125099182, "learning_rate": 0.00048412698412698415, "loss": 0.1076, "step": 570 }, { "epoch": 8.99, "eval_accuracy": 0.9690987124463519, "eval_f1": 0.9612241124260354, "eval_loss": 0.09004171937704086, "eval_precision": 0.9481984892871503, "eval_recall": 0.9769660431566722, "eval_runtime": 18.3674, "eval_samples_per_second": 63.427, "eval_steps_per_second": 3.974, "step": 573 }, { "epoch": 9.1, "grad_norm": 0.38498663902282715, "learning_rate": 0.0004047619047619048, "loss": 0.1174, "step": 580 }, { "epoch": 9.25, "grad_norm": 0.7055544853210449, "learning_rate": 0.0003253968253968254, "loss": 0.0951, "step": 590 }, { "epoch": 9.41, "grad_norm": 0.8157325387001038, "learning_rate": 0.00024603174603174605, "loss": 0.1397, "step": 600 }, { "epoch": 9.57, "grad_norm": 0.6511685252189636, "learning_rate": 0.00016666666666666666, "loss": 0.1296, "step": 610 }, { "epoch": 9.73, "grad_norm": 0.5405935049057007, "learning_rate": 8.730158730158731e-05, "loss": 0.1028, "step": 620 }, { "epoch": 9.88, "grad_norm": 0.472501665353775, "learning_rate": 7.936507936507936e-06, "loss": 0.1188, "step": 630 }, { "epoch": 9.88, "eval_accuracy": 0.9622317596566523, "eval_f1": 0.9529697156530292, "eval_loss": 0.10635051876306534, "eval_precision": 0.9377354615755036, "eval_recall": 0.9723202824133504, "eval_runtime": 18.4162, "eval_samples_per_second": 63.26, "eval_steps_per_second": 3.964, "step": 630 }, { "epoch": 9.88, "step": 630, "total_flos": 7.138406630605308e+18, "train_loss": 0.19363035304205758, "train_runtime": 1305.2945, "train_samples_per_second": 31.234, "train_steps_per_second": 0.483 } ], "logging_steps": 10, "max_steps": 630, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 7.138406630605308e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }