|
{ |
|
"best_metric": 0.975107296137339, |
|
"best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-446", |
|
"epoch": 9.882352941176471, |
|
"eval_steps": 500, |
|
"global_step": 630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1300164461135864, |
|
"learning_rate": 0.004920634920634921, |
|
"loss": 0.4737, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.7213650941848755, |
|
"learning_rate": 0.004841269841269842, |
|
"loss": 0.6061, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.0624804496765137, |
|
"learning_rate": 0.0047619047619047615, |
|
"loss": 0.2754, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.509834885597229, |
|
"learning_rate": 0.004682539682539683, |
|
"loss": 0.2462, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.2371031045913696, |
|
"learning_rate": 0.004603174603174603, |
|
"loss": 0.205, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.2106962203979492, |
|
"learning_rate": 0.004523809523809524, |
|
"loss": 0.2556, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9184549356223176, |
|
"eval_f1": 0.8822492390220755, |
|
"eval_loss": 0.20187422633171082, |
|
"eval_precision": 0.9446022031823746, |
|
"eval_recall": 0.8469420502475702, |
|
"eval_runtime": 11.2928, |
|
"eval_samples_per_second": 103.163, |
|
"eval_steps_per_second": 6.464, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.5079874992370605, |
|
"learning_rate": 0.0044444444444444444, |
|
"loss": 0.282, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.6824792623519897, |
|
"learning_rate": 0.004365079365079365, |
|
"loss": 0.2291, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.294320821762085, |
|
"learning_rate": 0.004285714285714286, |
|
"loss": 0.208, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 2.1044976711273193, |
|
"learning_rate": 0.004206349206349207, |
|
"loss": 0.2288, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.8730837106704712, |
|
"learning_rate": 0.0041269841269841265, |
|
"loss": 0.3071, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.6219590902328491, |
|
"learning_rate": 0.004047619047619048, |
|
"loss": 0.2302, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.9613733905579399, |
|
"eval_f1": 0.9513881612438695, |
|
"eval_loss": 0.10982219129800797, |
|
"eval_precision": 0.9396367521367521, |
|
"eval_recall": 0.965355461825295, |
|
"eval_runtime": 11.2742, |
|
"eval_samples_per_second": 103.333, |
|
"eval_steps_per_second": 6.475, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 1.7205462455749512, |
|
"learning_rate": 0.003968253968253968, |
|
"loss": 0.214, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.4880439341068268, |
|
"learning_rate": 0.003888888888888889, |
|
"loss": 0.2757, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.5420565605163574, |
|
"learning_rate": 0.0038095238095238095, |
|
"loss": 0.2314, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.872795820236206, |
|
"learning_rate": 0.0037301587301587303, |
|
"loss": 0.2689, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.3670737743377686, |
|
"learning_rate": 0.0036507936507936506, |
|
"loss": 0.1787, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.9697684049606323, |
|
"learning_rate": 0.0035714285714285718, |
|
"loss": 0.1962, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.6434329152107239, |
|
"learning_rate": 0.003492063492063492, |
|
"loss": 0.2258, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9622317596566523, |
|
"eval_f1": 0.9496135003577972, |
|
"eval_loss": 0.11506476998329163, |
|
"eval_precision": 0.9640835246121029, |
|
"eval_recall": 0.9372077296900788, |
|
"eval_runtime": 11.448, |
|
"eval_samples_per_second": 101.764, |
|
"eval_steps_per_second": 6.377, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.3758108913898468, |
|
"learning_rate": 0.003412698412698413, |
|
"loss": 0.2048, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.3552361726760864, |
|
"learning_rate": 0.003333333333333333, |
|
"loss": 0.1136, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.3793877959251404, |
|
"learning_rate": 0.0032539682539682543, |
|
"loss": 0.2048, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.6903932094573975, |
|
"learning_rate": 0.0031746031746031746, |
|
"loss": 0.2696, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.628604531288147, |
|
"learning_rate": 0.0030952380952380953, |
|
"loss": 0.1585, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.7250388860702515, |
|
"learning_rate": 0.0030158730158730157, |
|
"loss": 0.1465, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9725321888412017, |
|
"eval_f1": 0.9643166172765768, |
|
"eval_loss": 0.07334637641906738, |
|
"eval_precision": 0.9653162002256106, |
|
"eval_recall": 0.9633286722904824, |
|
"eval_runtime": 11.4298, |
|
"eval_samples_per_second": 101.927, |
|
"eval_steps_per_second": 6.387, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.4553186893463135, |
|
"learning_rate": 0.002936507936507937, |
|
"loss": 0.1609, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.3900231420993805, |
|
"learning_rate": 0.002857142857142857, |
|
"loss": 0.1517, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.45594316720962524, |
|
"learning_rate": 0.002777777777777778, |
|
"loss": 0.1705, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.46610358357429504, |
|
"learning_rate": 0.002698412698412698, |
|
"loss": 0.1451, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 0.557108461856842, |
|
"learning_rate": 0.0026190476190476194, |
|
"loss": 0.1582, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.4486098885536194, |
|
"learning_rate": 0.0025396825396825397, |
|
"loss": 0.1763, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9725321888412017, |
|
"eval_f1": 0.9639271421162228, |
|
"eval_loss": 0.07630692422389984, |
|
"eval_precision": 0.9702764832336926, |
|
"eval_recall": 0.9580085885445321, |
|
"eval_runtime": 11.1034, |
|
"eval_samples_per_second": 104.923, |
|
"eval_steps_per_second": 6.575, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.5927975177764893, |
|
"learning_rate": 0.0024603174603174604, |
|
"loss": 0.1563, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 0.5728869438171387, |
|
"learning_rate": 0.0023809523809523807, |
|
"loss": 0.1666, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 0.34924712777137756, |
|
"learning_rate": 0.0023015873015873015, |
|
"loss": 0.1425, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"grad_norm": 0.487810343503952, |
|
"learning_rate": 0.0022222222222222222, |
|
"loss": 0.1157, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 0.5470776557922363, |
|
"learning_rate": 0.002142857142857143, |
|
"loss": 0.1503, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 0.9323746562004089, |
|
"learning_rate": 0.0020634920634920633, |
|
"loss": 0.1813, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 0.4948749542236328, |
|
"learning_rate": 0.001984126984126984, |
|
"loss": 0.1627, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.9570815450643777, |
|
"eval_f1": 0.9465564950602603, |
|
"eval_loss": 0.10568992793560028, |
|
"eval_precision": 0.9315479562643401, |
|
"eval_recall": 0.965643911608289, |
|
"eval_runtime": 11.3843, |
|
"eval_samples_per_second": 102.334, |
|
"eval_steps_per_second": 6.412, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.5047426819801331, |
|
"learning_rate": 0.0019047619047619048, |
|
"loss": 0.1335, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"grad_norm": 0.3176202178001404, |
|
"learning_rate": 0.0018253968253968253, |
|
"loss": 0.0897, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"grad_norm": 0.42543891072273254, |
|
"learning_rate": 0.001746031746031746, |
|
"loss": 0.1356, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 0.35515210032463074, |
|
"learning_rate": 0.0016666666666666666, |
|
"loss": 0.1324, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 0.900916337966919, |
|
"learning_rate": 0.0015873015873015873, |
|
"loss": 0.1435, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 0.6653035283088684, |
|
"learning_rate": 0.0015079365079365078, |
|
"loss": 0.1509, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.975107296137339, |
|
"eval_f1": 0.9680345077825117, |
|
"eval_loss": 0.07007680833339691, |
|
"eval_precision": 0.9637800176993805, |
|
"eval_recall": 0.9725189498135582, |
|
"eval_runtime": 12.1771, |
|
"eval_samples_per_second": 95.672, |
|
"eval_steps_per_second": 5.995, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.31296506524086, |
|
"learning_rate": 0.0014285714285714286, |
|
"loss": 0.1322, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 0.6464645862579346, |
|
"learning_rate": 0.001349206349206349, |
|
"loss": 0.1457, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 1.0385708808898926, |
|
"learning_rate": 0.0012698412698412698, |
|
"loss": 0.1278, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"grad_norm": 0.6095046401023865, |
|
"learning_rate": 0.0011904761904761904, |
|
"loss": 0.1168, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"grad_norm": 0.7724207639694214, |
|
"learning_rate": 0.0011111111111111111, |
|
"loss": 0.1182, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.4225674569606781, |
|
"learning_rate": 0.0010317460317460316, |
|
"loss": 0.1325, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.47218218445777893, |
|
"learning_rate": 0.0009523809523809524, |
|
"loss": 0.1209, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9570815450643777, |
|
"eval_f1": 0.9465564950602603, |
|
"eval_loss": 0.10465892404317856, |
|
"eval_precision": 0.9315479562643401, |
|
"eval_recall": 0.965643911608289, |
|
"eval_runtime": 11.3686, |
|
"eval_samples_per_second": 102.475, |
|
"eval_steps_per_second": 6.421, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.296578973531723, |
|
"learning_rate": 0.000873015873015873, |
|
"loss": 0.1006, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 0.9204122424125671, |
|
"learning_rate": 0.0007936507936507937, |
|
"loss": 0.1202, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"grad_norm": 0.5195232629776001, |
|
"learning_rate": 0.0007142857142857143, |
|
"loss": 0.1052, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"grad_norm": 0.29583191871643066, |
|
"learning_rate": 0.0006349206349206349, |
|
"loss": 0.1285, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 0.4851154386997223, |
|
"learning_rate": 0.0005555555555555556, |
|
"loss": 0.1271, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"grad_norm": 0.47467851638793945, |
|
"learning_rate": 0.0004761904761904762, |
|
"loss": 0.0961, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9733905579399141, |
|
"eval_f1": 0.966177521420591, |
|
"eval_loss": 0.07205679267644882, |
|
"eval_precision": 0.9577373642293522, |
|
"eval_recall": 0.975613576624488, |
|
"eval_runtime": 11.2412, |
|
"eval_samples_per_second": 103.637, |
|
"eval_steps_per_second": 6.494, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.29292458295822144, |
|
"learning_rate": 0.0003968253968253968, |
|
"loss": 0.096, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.513671338558197, |
|
"learning_rate": 0.00031746031746031746, |
|
"loss": 0.0871, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.8143641948699951, |
|
"learning_rate": 0.0002380952380952381, |
|
"loss": 0.1297, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"grad_norm": 0.4731471836566925, |
|
"learning_rate": 0.00015873015873015873, |
|
"loss": 0.1175, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"grad_norm": 0.30293089151382446, |
|
"learning_rate": 7.936507936507937e-05, |
|
"loss": 0.1166, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"grad_norm": 0.3364422023296356, |
|
"learning_rate": 0.0, |
|
"loss": 0.1063, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_accuracy": 0.9622317596566523, |
|
"eval_f1": 0.9526072485207102, |
|
"eval_loss": 0.08854742348194122, |
|
"eval_precision": 0.9398335630995462, |
|
"eval_recall": 0.9680642154165903, |
|
"eval_runtime": 11.4305, |
|
"eval_samples_per_second": 101.92, |
|
"eval_steps_per_second": 6.386, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"step": 630, |
|
"total_flos": 3.1439051980091965e+18, |
|
"train_loss": 0.1791202223490155, |
|
"train_runtime": 695.1527, |
|
"train_samples_per_second": 58.649, |
|
"train_steps_per_second": 0.906 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.1439051980091965e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|