{ "best_metric": 0.975107296137339, "best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-446", "epoch": 9.882352941176471, "eval_steps": 500, "global_step": 630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 1.1300164461135864, "learning_rate": 0.004920634920634921, "loss": 0.4737, "step": 10 }, { "epoch": 0.31, "grad_norm": 0.7213650941848755, "learning_rate": 0.004841269841269842, "loss": 0.6061, "step": 20 }, { "epoch": 0.47, "grad_norm": 3.0624804496765137, "learning_rate": 0.0047619047619047615, "loss": 0.2754, "step": 30 }, { "epoch": 0.63, "grad_norm": 0.509834885597229, "learning_rate": 0.004682539682539683, "loss": 0.2462, "step": 40 }, { "epoch": 0.78, "grad_norm": 1.2371031045913696, "learning_rate": 0.004603174603174603, "loss": 0.205, "step": 50 }, { "epoch": 0.94, "grad_norm": 1.2106962203979492, "learning_rate": 0.004523809523809524, "loss": 0.2556, "step": 60 }, { "epoch": 0.99, "eval_accuracy": 0.9184549356223176, "eval_f1": 0.8822492390220755, "eval_loss": 0.20187422633171082, "eval_precision": 0.9446022031823746, "eval_recall": 0.8469420502475702, "eval_runtime": 11.2928, "eval_samples_per_second": 103.163, "eval_steps_per_second": 6.464, "step": 63 }, { "epoch": 1.1, "grad_norm": 2.5079874992370605, "learning_rate": 0.0044444444444444444, "loss": 0.282, "step": 70 }, { "epoch": 1.25, "grad_norm": 0.6824792623519897, "learning_rate": 0.004365079365079365, "loss": 0.2291, "step": 80 }, { "epoch": 1.41, "grad_norm": 1.294320821762085, "learning_rate": 0.004285714285714286, "loss": 0.208, "step": 90 }, { "epoch": 1.57, "grad_norm": 2.1044976711273193, "learning_rate": 0.004206349206349207, "loss": 0.2288, "step": 100 }, { "epoch": 1.73, "grad_norm": 0.8730837106704712, "learning_rate": 0.0041269841269841265, "loss": 0.3071, "step": 110 }, { "epoch": 1.88, "grad_norm": 1.6219590902328491, "learning_rate": 0.004047619047619048, "loss": 0.2302, "step": 120 }, { "epoch": 1.99, "eval_accuracy": 0.9613733905579399, "eval_f1": 0.9513881612438695, "eval_loss": 0.10982219129800797, "eval_precision": 0.9396367521367521, "eval_recall": 0.965355461825295, "eval_runtime": 11.2742, "eval_samples_per_second": 103.333, "eval_steps_per_second": 6.475, "step": 127 }, { "epoch": 2.04, "grad_norm": 1.7205462455749512, "learning_rate": 0.003968253968253968, "loss": 0.214, "step": 130 }, { "epoch": 2.2, "grad_norm": 0.4880439341068268, "learning_rate": 0.003888888888888889, "loss": 0.2757, "step": 140 }, { "epoch": 2.35, "grad_norm": 1.5420565605163574, "learning_rate": 0.0038095238095238095, "loss": 0.2314, "step": 150 }, { "epoch": 2.51, "grad_norm": 0.872795820236206, "learning_rate": 0.0037301587301587303, "loss": 0.2689, "step": 160 }, { "epoch": 2.67, "grad_norm": 1.3670737743377686, "learning_rate": 0.0036507936507936506, "loss": 0.1787, "step": 170 }, { "epoch": 2.82, "grad_norm": 0.9697684049606323, "learning_rate": 0.0035714285714285718, "loss": 0.1962, "step": 180 }, { "epoch": 2.98, "grad_norm": 0.6434329152107239, "learning_rate": 0.003492063492063492, "loss": 0.2258, "step": 190 }, { "epoch": 3.0, "eval_accuracy": 0.9622317596566523, "eval_f1": 0.9496135003577972, "eval_loss": 0.11506476998329163, "eval_precision": 0.9640835246121029, "eval_recall": 0.9372077296900788, "eval_runtime": 11.448, "eval_samples_per_second": 101.764, "eval_steps_per_second": 6.377, "step": 191 }, { "epoch": 3.14, "grad_norm": 0.3758108913898468, "learning_rate": 0.003412698412698413, "loss": 0.2048, "step": 200 }, { "epoch": 3.29, "grad_norm": 0.3552361726760864, "learning_rate": 0.003333333333333333, "loss": 0.1136, "step": 210 }, { "epoch": 3.45, "grad_norm": 0.3793877959251404, "learning_rate": 0.0032539682539682543, "loss": 0.2048, "step": 220 }, { "epoch": 3.61, "grad_norm": 0.6903932094573975, "learning_rate": 0.0031746031746031746, "loss": 0.2696, "step": 230 }, { "epoch": 3.76, "grad_norm": 0.628604531288147, "learning_rate": 0.0030952380952380953, "loss": 0.1585, "step": 240 }, { "epoch": 3.92, "grad_norm": 0.7250388860702515, "learning_rate": 0.0030158730158730157, "loss": 0.1465, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.9725321888412017, "eval_f1": 0.9643166172765768, "eval_loss": 0.07334637641906738, "eval_precision": 0.9653162002256106, "eval_recall": 0.9633286722904824, "eval_runtime": 11.4298, "eval_samples_per_second": 101.927, "eval_steps_per_second": 6.387, "step": 255 }, { "epoch": 4.08, "grad_norm": 0.4553186893463135, "learning_rate": 0.002936507936507937, "loss": 0.1609, "step": 260 }, { "epoch": 4.24, "grad_norm": 0.3900231420993805, "learning_rate": 0.002857142857142857, "loss": 0.1517, "step": 270 }, { "epoch": 4.39, "grad_norm": 0.45594316720962524, "learning_rate": 0.002777777777777778, "loss": 0.1705, "step": 280 }, { "epoch": 4.55, "grad_norm": 0.46610358357429504, "learning_rate": 0.002698412698412698, "loss": 0.1451, "step": 290 }, { "epoch": 4.71, "grad_norm": 0.557108461856842, "learning_rate": 0.0026190476190476194, "loss": 0.1582, "step": 300 }, { "epoch": 4.86, "grad_norm": 0.4486098885536194, "learning_rate": 0.0025396825396825397, "loss": 0.1763, "step": 310 }, { "epoch": 4.99, "eval_accuracy": 0.9725321888412017, "eval_f1": 0.9639271421162228, "eval_loss": 0.07630692422389984, "eval_precision": 0.9702764832336926, "eval_recall": 0.9580085885445321, "eval_runtime": 11.1034, "eval_samples_per_second": 104.923, "eval_steps_per_second": 6.575, "step": 318 }, { "epoch": 5.02, "grad_norm": 0.5927975177764893, "learning_rate": 0.0024603174603174604, "loss": 0.1563, "step": 320 }, { "epoch": 5.18, "grad_norm": 0.5728869438171387, "learning_rate": 0.0023809523809523807, "loss": 0.1666, "step": 330 }, { "epoch": 5.33, "grad_norm": 0.34924712777137756, "learning_rate": 0.0023015873015873015, "loss": 0.1425, "step": 340 }, { "epoch": 5.49, "grad_norm": 0.487810343503952, "learning_rate": 0.0022222222222222222, "loss": 0.1157, "step": 350 }, { "epoch": 5.65, "grad_norm": 0.5470776557922363, "learning_rate": 0.002142857142857143, "loss": 0.1503, "step": 360 }, { "epoch": 5.8, "grad_norm": 0.9323746562004089, "learning_rate": 0.0020634920634920633, "loss": 0.1813, "step": 370 }, { "epoch": 5.96, "grad_norm": 0.4948749542236328, "learning_rate": 0.001984126984126984, "loss": 0.1627, "step": 380 }, { "epoch": 5.99, "eval_accuracy": 0.9570815450643777, "eval_f1": 0.9465564950602603, "eval_loss": 0.10568992793560028, "eval_precision": 0.9315479562643401, "eval_recall": 0.965643911608289, "eval_runtime": 11.3843, "eval_samples_per_second": 102.334, "eval_steps_per_second": 6.412, "step": 382 }, { "epoch": 6.12, "grad_norm": 0.5047426819801331, "learning_rate": 0.0019047619047619048, "loss": 0.1335, "step": 390 }, { "epoch": 6.27, "grad_norm": 0.3176202178001404, "learning_rate": 0.0018253968253968253, "loss": 0.0897, "step": 400 }, { "epoch": 6.43, "grad_norm": 0.42543891072273254, "learning_rate": 0.001746031746031746, "loss": 0.1356, "step": 410 }, { "epoch": 6.59, "grad_norm": 0.35515210032463074, "learning_rate": 0.0016666666666666666, "loss": 0.1324, "step": 420 }, { "epoch": 6.75, "grad_norm": 0.900916337966919, "learning_rate": 0.0015873015873015873, "loss": 0.1435, "step": 430 }, { "epoch": 6.9, "grad_norm": 0.6653035283088684, "learning_rate": 0.0015079365079365078, "loss": 0.1509, "step": 440 }, { "epoch": 7.0, "eval_accuracy": 0.975107296137339, "eval_f1": 0.9680345077825117, "eval_loss": 0.07007680833339691, "eval_precision": 0.9637800176993805, "eval_recall": 0.9725189498135582, "eval_runtime": 12.1771, "eval_samples_per_second": 95.672, "eval_steps_per_second": 5.995, "step": 446 }, { "epoch": 7.06, "grad_norm": 0.31296506524086, "learning_rate": 0.0014285714285714286, "loss": 0.1322, "step": 450 }, { "epoch": 7.22, "grad_norm": 0.6464645862579346, "learning_rate": 0.001349206349206349, "loss": 0.1457, "step": 460 }, { "epoch": 7.37, "grad_norm": 1.0385708808898926, "learning_rate": 0.0012698412698412698, "loss": 0.1278, "step": 470 }, { "epoch": 7.53, "grad_norm": 0.6095046401023865, "learning_rate": 0.0011904761904761904, "loss": 0.1168, "step": 480 }, { "epoch": 7.69, "grad_norm": 0.7724207639694214, "learning_rate": 0.0011111111111111111, "loss": 0.1182, "step": 490 }, { "epoch": 7.84, "grad_norm": 0.4225674569606781, "learning_rate": 0.0010317460317460316, "loss": 0.1325, "step": 500 }, { "epoch": 8.0, "grad_norm": 0.47218218445777893, "learning_rate": 0.0009523809523809524, "loss": 0.1209, "step": 510 }, { "epoch": 8.0, "eval_accuracy": 0.9570815450643777, "eval_f1": 0.9465564950602603, "eval_loss": 0.10465892404317856, "eval_precision": 0.9315479562643401, "eval_recall": 0.965643911608289, "eval_runtime": 11.3686, "eval_samples_per_second": 102.475, "eval_steps_per_second": 6.421, "step": 510 }, { "epoch": 8.16, "grad_norm": 0.296578973531723, "learning_rate": 0.000873015873015873, "loss": 0.1006, "step": 520 }, { "epoch": 8.31, "grad_norm": 0.9204122424125671, "learning_rate": 0.0007936507936507937, "loss": 0.1202, "step": 530 }, { "epoch": 8.47, "grad_norm": 0.5195232629776001, "learning_rate": 0.0007142857142857143, "loss": 0.1052, "step": 540 }, { "epoch": 8.63, "grad_norm": 0.29583191871643066, "learning_rate": 0.0006349206349206349, "loss": 0.1285, "step": 550 }, { "epoch": 8.78, "grad_norm": 0.4851154386997223, "learning_rate": 0.0005555555555555556, "loss": 0.1271, "step": 560 }, { "epoch": 8.94, "grad_norm": 0.47467851638793945, "learning_rate": 0.0004761904761904762, "loss": 0.0961, "step": 570 }, { "epoch": 8.99, "eval_accuracy": 0.9733905579399141, "eval_f1": 0.966177521420591, "eval_loss": 0.07205679267644882, "eval_precision": 0.9577373642293522, "eval_recall": 0.975613576624488, "eval_runtime": 11.2412, "eval_samples_per_second": 103.637, "eval_steps_per_second": 6.494, "step": 573 }, { "epoch": 9.1, "grad_norm": 0.29292458295822144, "learning_rate": 0.0003968253968253968, "loss": 0.096, "step": 580 }, { "epoch": 9.25, "grad_norm": 0.513671338558197, "learning_rate": 0.00031746031746031746, "loss": 0.0871, "step": 590 }, { "epoch": 9.41, "grad_norm": 0.8143641948699951, "learning_rate": 0.0002380952380952381, "loss": 0.1297, "step": 600 }, { "epoch": 9.57, "grad_norm": 0.4731471836566925, "learning_rate": 0.00015873015873015873, "loss": 0.1175, "step": 610 }, { "epoch": 9.73, "grad_norm": 0.30293089151382446, "learning_rate": 7.936507936507937e-05, "loss": 0.1166, "step": 620 }, { "epoch": 9.88, "grad_norm": 0.3364422023296356, "learning_rate": 0.0, "loss": 0.1063, "step": 630 }, { "epoch": 9.88, "eval_accuracy": 0.9622317596566523, "eval_f1": 0.9526072485207102, "eval_loss": 0.08854742348194122, "eval_precision": 0.9398335630995462, "eval_recall": 0.9680642154165903, "eval_runtime": 11.4305, "eval_samples_per_second": 101.92, "eval_steps_per_second": 6.386, "step": 630 }, { "epoch": 9.88, "step": 630, "total_flos": 3.1439051980091965e+18, "train_loss": 0.1791202223490155, "train_runtime": 695.1527, "train_samples_per_second": 58.649, "train_steps_per_second": 0.906 } ], "logging_steps": 10, "max_steps": 630, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.1439051980091965e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }