{ "best_metric": 0.9141630901287554, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-63", "epoch": 9.882352941176471, "eval_steps": 500, "global_step": 630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 2.7611124515533447, "learning_rate": 0.004920634920634921, "loss": 0.6696, "step": 10 }, { "epoch": 0.31, "grad_norm": 3.1516690254211426, "learning_rate": 0.004841269841269842, "loss": 0.5037, "step": 20 }, { "epoch": 0.47, "grad_norm": 1.6632912158966064, "learning_rate": 0.0047619047619047615, "loss": 0.5343, "step": 30 }, { "epoch": 0.63, "grad_norm": 15.660941123962402, "learning_rate": 0.004682539682539683, "loss": 0.7623, "step": 40 }, { "epoch": 0.78, "grad_norm": 1.5720008611679077, "learning_rate": 0.004603174603174603, "loss": 0.5904, "step": 50 }, { "epoch": 0.94, "grad_norm": 3.288884401321411, "learning_rate": 0.004523809523809524, "loss": 0.4775, "step": 60 }, { "epoch": 0.99, "eval_accuracy": 0.9141630901287554, "eval_f1": 0.8903401432261271, "eval_loss": 0.22644232213497162, "eval_precision": 0.884993651288677, "eval_recall": 0.8961752704933064, "eval_runtime": 12.2128, "eval_samples_per_second": 95.391, "eval_steps_per_second": 5.977, "step": 63 }, { "epoch": 1.1, "grad_norm": 2.6683709621429443, "learning_rate": 0.0044603174603174605, "loss": 0.4832, "step": 70 }, { "epoch": 1.25, "grad_norm": 1.2820926904678345, "learning_rate": 0.004380952380952381, "loss": 0.5064, "step": 80 }, { "epoch": 1.41, "grad_norm": 5.157268047332764, "learning_rate": 0.004301587301587302, "loss": 0.4561, "step": 90 }, { "epoch": 1.57, "grad_norm": 0.9834429025650024, "learning_rate": 0.004222222222222223, "loss": 0.3981, "step": 100 }, { "epoch": 1.73, "grad_norm": 7.78388786315918, "learning_rate": 0.0041428571428571434, "loss": 0.5016, "step": 110 }, { "epoch": 1.88, "grad_norm": 12.203431129455566, "learning_rate": 0.004063492063492063, "loss": 0.7117, "step": 120 }, { "epoch": 1.99, "eval_accuracy": 0.7390557939914163, "eval_f1": 0.4249753208292202, "eval_loss": 0.4007842242717743, "eval_precision": 0.36952789699570815, "eval_recall": 0.5, "eval_runtime": 12.6185, "eval_samples_per_second": 92.325, "eval_steps_per_second": 5.785, "step": 127 }, { "epoch": 2.04, "grad_norm": 9.85489273071289, "learning_rate": 0.003992063492063492, "loss": 0.4575, "step": 130 }, { "epoch": 2.2, "grad_norm": 2.926109552383423, "learning_rate": 0.003912698412698413, "loss": 0.4562, "step": 140 }, { "epoch": 2.35, "grad_norm": 0.5027350187301636, "learning_rate": 0.0038333333333333336, "loss": 0.417, "step": 150 }, { "epoch": 2.51, "grad_norm": 0.8259086608886719, "learning_rate": 0.003753968253968254, "loss": 0.3953, "step": 160 }, { "epoch": 2.67, "grad_norm": 2.8119688034057617, "learning_rate": 0.003674603174603175, "loss": 0.4175, "step": 170 }, { "epoch": 2.82, "grad_norm": 2.306551218032837, "learning_rate": 0.0035952380952380954, "loss": 0.4226, "step": 180 }, { "epoch": 2.98, "grad_norm": 6.00565242767334, "learning_rate": 0.003515873015873016, "loss": 0.4115, "step": 190 }, { "epoch": 3.0, "eval_accuracy": 0.8154506437768241, "eval_f1": 0.7957275475643482, "eval_loss": 0.43578293919563293, "eval_precision": 0.7870816708492059, "eval_recall": 0.8645050125313283, "eval_runtime": 12.1326, "eval_samples_per_second": 96.022, "eval_steps_per_second": 6.017, "step": 191 }, { "epoch": 3.14, "grad_norm": 1.8762693405151367, "learning_rate": 0.0034365079365079364, "loss": 0.3876, "step": 200 }, { "epoch": 3.29, "grad_norm": 1.1961586475372314, "learning_rate": 0.003357142857142857, "loss": 0.3232, "step": 210 }, { "epoch": 3.45, "grad_norm": 0.5015142560005188, "learning_rate": 0.003277777777777778, "loss": 0.4061, "step": 220 }, { "epoch": 3.61, "grad_norm": 2.750140428543091, "learning_rate": 0.003198412698412698, "loss": 0.372, "step": 230 }, { "epoch": 3.76, "grad_norm": 2.029989242553711, "learning_rate": 0.003119047619047619, "loss": 0.3498, "step": 240 }, { "epoch": 3.92, "grad_norm": 2.366288661956787, "learning_rate": 0.0030396825396825397, "loss": 0.3631, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.8798283261802575, "eval_f1": 0.8517509925684618, "eval_loss": 0.30910125374794006, "eval_precision": 0.8380756825290672, "eval_recall": 0.8708184332783178, "eval_runtime": 12.051, "eval_samples_per_second": 96.673, "eval_steps_per_second": 6.058, "step": 255 }, { "epoch": 4.08, "grad_norm": 0.5986310839653015, "learning_rate": 0.0029603174603174604, "loss": 0.3413, "step": 260 }, { "epoch": 4.24, "grad_norm": 4.78090763092041, "learning_rate": 0.0028809523809523807, "loss": 0.384, "step": 270 }, { "epoch": 4.39, "grad_norm": 3.392075300216675, "learning_rate": 0.0028015873015873015, "loss": 0.3579, "step": 280 }, { "epoch": 4.55, "grad_norm": 1.982884407043457, "learning_rate": 0.0027222222222222222, "loss": 0.3397, "step": 290 }, { "epoch": 4.71, "grad_norm": 4.884500980377197, "learning_rate": 0.002642857142857143, "loss": 0.3609, "step": 300 }, { "epoch": 4.86, "grad_norm": 0.9579557776451111, "learning_rate": 0.0025634920634920633, "loss": 0.3794, "step": 310 }, { "epoch": 4.99, "eval_accuracy": 0.8798283261802575, "eval_f1": 0.8494965377744312, "eval_loss": 0.2801721394062042, "eval_precision": 0.8392582443135881, "eval_recall": 0.8623062992847974, "eval_runtime": 12.197, "eval_samples_per_second": 95.516, "eval_steps_per_second": 5.985, "step": 318 }, { "epoch": 5.02, "grad_norm": 1.1442433595657349, "learning_rate": 0.002484126984126984, "loss": 0.3566, "step": 320 }, { "epoch": 5.18, "grad_norm": 4.931128978729248, "learning_rate": 0.0024047619047619048, "loss": 0.4874, "step": 330 }, { "epoch": 5.33, "grad_norm": 2.474273443222046, "learning_rate": 0.0023253968253968255, "loss": 0.375, "step": 340 }, { "epoch": 5.49, "grad_norm": 2.144062042236328, "learning_rate": 0.0022460317460317463, "loss": 0.368, "step": 350 }, { "epoch": 5.65, "grad_norm": 0.6386366486549377, "learning_rate": 0.002166666666666667, "loss": 0.3755, "step": 360 }, { "epoch": 5.8, "grad_norm": 3.0398051738739014, "learning_rate": 0.0020873015873015873, "loss": 0.3248, "step": 370 }, { "epoch": 5.96, "grad_norm": 2.5777676105499268, "learning_rate": 0.002007936507936508, "loss": 0.3713, "step": 380 }, { "epoch": 5.99, "eval_accuracy": 0.8772532188841202, "eval_f1": 0.8449126373928156, "eval_loss": 0.2805176377296448, "eval_precision": 0.8370533441922019, "eval_recall": 0.8541800385109115, "eval_runtime": 11.9639, "eval_samples_per_second": 97.376, "eval_steps_per_second": 6.102, "step": 382 }, { "epoch": 6.12, "grad_norm": 0.9707914590835571, "learning_rate": 0.0019285714285714288, "loss": 0.3227, "step": 390 }, { "epoch": 6.27, "grad_norm": 1.1869500875473022, "learning_rate": 0.0018492063492063493, "loss": 0.3271, "step": 400 }, { "epoch": 6.43, "grad_norm": 0.6983945369720459, "learning_rate": 0.00176984126984127, "loss": 0.3709, "step": 410 }, { "epoch": 6.59, "grad_norm": 1.19561767578125, "learning_rate": 0.0016904761904761906, "loss": 0.3629, "step": 420 }, { "epoch": 6.75, "grad_norm": 1.7224721908569336, "learning_rate": 0.0016111111111111113, "loss": 0.3182, "step": 430 }, { "epoch": 6.9, "grad_norm": 2.228806972503662, "learning_rate": 0.0015317460317460319, "loss": 0.3953, "step": 440 }, { "epoch": 7.0, "eval_accuracy": 0.8583690987124464, "eval_f1": 0.836744521418762, "eval_loss": 0.3396996855735779, "eval_precision": 0.8185304398119043, "eval_recall": 0.8871569166819487, "eval_runtime": 12.0715, "eval_samples_per_second": 96.508, "eval_steps_per_second": 6.047, "step": 446 }, { "epoch": 7.06, "grad_norm": 3.1751720905303955, "learning_rate": 0.0014523809523809526, "loss": 0.3753, "step": 450 }, { "epoch": 7.22, "grad_norm": 2.4551353454589844, "learning_rate": 0.0013730158730158731, "loss": 0.3061, "step": 460 }, { "epoch": 7.37, "grad_norm": 1.707070231437683, "learning_rate": 0.0012936507936507939, "loss": 0.3525, "step": 470 }, { "epoch": 7.53, "grad_norm": 0.43897438049316406, "learning_rate": 0.0012142857142857144, "loss": 0.3032, "step": 480 }, { "epoch": 7.69, "grad_norm": 2.149365186691284, "learning_rate": 0.001134920634920635, "loss": 0.3031, "step": 490 }, { "epoch": 7.84, "grad_norm": 1.5541889667510986, "learning_rate": 0.0010555555555555557, "loss": 0.3199, "step": 500 }, { "epoch": 8.0, "grad_norm": 0.5194265842437744, "learning_rate": 0.0009761904761904762, "loss": 0.3218, "step": 510 }, { "epoch": 8.0, "eval_accuracy": 0.8669527896995708, "eval_f1": 0.8448168439144914, "eval_loss": 0.3072466254234314, "eval_precision": 0.8257109927719459, "eval_recall": 0.8897720673635308, "eval_runtime": 12.0816, "eval_samples_per_second": 96.428, "eval_steps_per_second": 6.042, "step": 510 }, { "epoch": 8.16, "grad_norm": 0.4812917709350586, "learning_rate": 0.0008968253968253968, "loss": 0.3105, "step": 520 }, { "epoch": 8.31, "grad_norm": 3.871387004852295, "learning_rate": 0.0008174603174603175, "loss": 0.3398, "step": 530 }, { "epoch": 8.47, "grad_norm": 0.3649653196334839, "learning_rate": 0.000746031746031746, "loss": 0.3169, "step": 540 }, { "epoch": 8.63, "grad_norm": 0.4373646676540375, "learning_rate": 0.0006666666666666666, "loss": 0.3019, "step": 550 }, { "epoch": 8.78, "grad_norm": 0.21746236085891724, "learning_rate": 0.0005873015873015873, "loss": 0.3463, "step": 560 }, { "epoch": 8.94, "grad_norm": 1.6411595344543457, "learning_rate": 0.0005079365079365079, "loss": 0.3219, "step": 570 }, { "epoch": 8.99, "eval_accuracy": 0.896137339055794, "eval_f1": 0.870789179160911, "eval_loss": 0.263265997171402, "eval_precision": 0.8582184517497349, "eval_recall": 0.887172198789657, "eval_runtime": 12.1817, "eval_samples_per_second": 95.635, "eval_steps_per_second": 5.993, "step": 573 }, { "epoch": 9.1, "grad_norm": 1.573140025138855, "learning_rate": 0.0004285714285714286, "loss": 0.3078, "step": 580 }, { "epoch": 9.25, "grad_norm": 0.6549800634384155, "learning_rate": 0.00034920634920634924, "loss": 0.2872, "step": 590 }, { "epoch": 9.41, "grad_norm": 0.4417751133441925, "learning_rate": 0.0002698412698412699, "loss": 0.3256, "step": 600 }, { "epoch": 9.57, "grad_norm": 3.714104413986206, "learning_rate": 0.0001904761904761905, "loss": 0.361, "step": 610 }, { "epoch": 9.73, "grad_norm": 0.5447297096252441, "learning_rate": 0.00011111111111111112, "loss": 0.3773, "step": 620 }, { "epoch": 9.88, "grad_norm": 1.347410798072815, "learning_rate": 3.1746031746031745e-05, "loss": 0.3049, "step": 630 }, { "epoch": 9.88, "eval_accuracy": 0.8927038626609443, "eval_f1": 0.8684775000564475, "eval_loss": 0.27391761541366577, "eval_precision": 0.8528169402296, "eval_recall": 0.8912334189131366, "eval_runtime": 12.284, "eval_samples_per_second": 94.839, "eval_steps_per_second": 5.943, "step": 630 }, { "epoch": 9.88, "step": 630, "total_flos": 3.142570654487126e+18, "train_loss": 0.3948629246817695, "train_runtime": 760.2155, "train_samples_per_second": 53.63, "train_steps_per_second": 0.829 } ], "logging_steps": 10, "max_steps": 630, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.142570654487126e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }