{ "best_metric": 0.9637404580152672, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-516", "epoch": 9.898305084745763, "eval_steps": 500, "global_step": 730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13559322033898305, "grad_norm": 0.33258745074272156, "learning_rate": 0.004931506849315068, "loss": 0.4697, "step": 10 }, { "epoch": 0.2711864406779661, "grad_norm": 0.4260016679763794, "learning_rate": 0.0048630136986301375, "loss": 0.4209, "step": 20 }, { "epoch": 0.4067796610169492, "grad_norm": 0.335248202085495, "learning_rate": 0.004794520547945206, "loss": 0.3029, "step": 30 }, { "epoch": 0.5423728813559322, "grad_norm": 1.0114984512329102, "learning_rate": 0.004726027397260274, "loss": 0.2765, "step": 40 }, { "epoch": 0.6779661016949152, "grad_norm": 0.7004571557044983, "learning_rate": 0.004657534246575342, "loss": 0.2801, "step": 50 }, { "epoch": 0.8135593220338984, "grad_norm": 0.4206073582172394, "learning_rate": 0.004589041095890411, "loss": 0.2222, "step": 60 }, { "epoch": 0.9491525423728814, "grad_norm": 0.49781563878059387, "learning_rate": 0.00452054794520548, "loss": 0.2447, "step": 70 }, { "epoch": 0.9898305084745763, "eval_accuracy": 0.9351145038167938, "eval_f1": 0.9199611845888442, "eval_loss": 0.15377819538116455, "eval_precision": 0.9013226562633677, "eval_recall": 0.9466247738741311, "eval_runtime": 2.598, "eval_samples_per_second": 201.69, "eval_steps_per_second": 12.702, "step": 73 }, { "epoch": 1.0847457627118644, "grad_norm": 2.9151036739349365, "learning_rate": 0.004452054794520548, "loss": 0.2175, "step": 80 }, { "epoch": 1.2203389830508475, "grad_norm": 0.7062793374061584, "learning_rate": 0.004383561643835616, "loss": 0.2292, "step": 90 }, { "epoch": 1.3559322033898304, "grad_norm": 0.6493266820907593, "learning_rate": 0.004315068493150685, "loss": 0.2363, "step": 100 }, { "epoch": 1.4915254237288136, "grad_norm": 0.487130343914032, "learning_rate": 0.0042465753424657535, "loss": 0.1594, "step": 110 }, { "epoch": 1.6271186440677967, "grad_norm": 1.8056527376174927, "learning_rate": 0.004178082191780822, "loss": 0.2788, "step": 120 }, { "epoch": 1.7627118644067796, "grad_norm": 1.6562355756759644, "learning_rate": 0.00410958904109589, "loss": 0.317, "step": 130 }, { "epoch": 1.8983050847457628, "grad_norm": 3.5319395065307617, "learning_rate": 0.004041095890410959, "loss": 0.3466, "step": 140 }, { "epoch": 1.993220338983051, "eval_accuracy": 0.9122137404580153, "eval_f1": 0.87496628280942, "eval_loss": 0.24509698152542114, "eval_precision": 0.9197011276895048, "eval_recall": 0.846558126249643, "eval_runtime": 2.2021, "eval_samples_per_second": 237.954, "eval_steps_per_second": 14.986, "step": 147 }, { "epoch": 2.0338983050847457, "grad_norm": 1.5860559940338135, "learning_rate": 0.003972602739726027, "loss": 0.4163, "step": 150 }, { "epoch": 2.169491525423729, "grad_norm": 3.2184932231903076, "learning_rate": 0.003904109589041096, "loss": 0.3898, "step": 160 }, { "epoch": 2.305084745762712, "grad_norm": 0.8964293003082275, "learning_rate": 0.0038356164383561643, "loss": 0.5784, "step": 170 }, { "epoch": 2.440677966101695, "grad_norm": 0.6328169703483582, "learning_rate": 0.003767123287671233, "loss": 0.3161, "step": 180 }, { "epoch": 2.576271186440678, "grad_norm": 0.5992427468299866, "learning_rate": 0.0036986301369863013, "loss": 0.2642, "step": 190 }, { "epoch": 2.711864406779661, "grad_norm": 0.802533745765686, "learning_rate": 0.00363013698630137, "loss": 0.3049, "step": 200 }, { "epoch": 2.847457627118644, "grad_norm": 0.41971662640571594, "learning_rate": 0.003561643835616438, "loss": 0.2395, "step": 210 }, { "epoch": 2.983050847457627, "grad_norm": 1.329890489578247, "learning_rate": 0.003493150684931507, "loss": 0.2074, "step": 220 }, { "epoch": 2.9966101694915253, "eval_accuracy": 0.9427480916030534, "eval_f1": 0.9203211483486406, "eval_loss": 0.17114438116550446, "eval_precision": 0.9537988351547674, "eval_recall": 0.8961439588688946, "eval_runtime": 2.5606, "eval_samples_per_second": 204.643, "eval_steps_per_second": 12.888, "step": 221 }, { "epoch": 3.1186440677966103, "grad_norm": 0.5802115797996521, "learning_rate": 0.003424657534246575, "loss": 0.2699, "step": 230 }, { "epoch": 3.2542372881355934, "grad_norm": 0.4673289954662323, "learning_rate": 0.003356164383561644, "loss": 0.2138, "step": 240 }, { "epoch": 3.389830508474576, "grad_norm": 0.6567840576171875, "learning_rate": 0.003287671232876712, "loss": 0.2106, "step": 250 }, { "epoch": 3.5254237288135593, "grad_norm": 0.6320674419403076, "learning_rate": 0.0032191780821917808, "loss": 0.2035, "step": 260 }, { "epoch": 3.6610169491525424, "grad_norm": 0.6084808707237244, "learning_rate": 0.003150684931506849, "loss": 0.1865, "step": 270 }, { "epoch": 3.7966101694915255, "grad_norm": 0.4084516763687134, "learning_rate": 0.003082191780821918, "loss": 0.1982, "step": 280 }, { "epoch": 3.9322033898305087, "grad_norm": 0.9839584231376648, "learning_rate": 0.0030136986301369864, "loss": 0.1928, "step": 290 }, { "epoch": 4.0, "eval_accuracy": 0.9618320610687023, "eval_f1": 0.9503486961795027, "eval_loss": 0.10444469004869461, "eval_precision": 0.9482261076217959, "eval_recall": 0.9525278491859469, "eval_runtime": 2.269, "eval_samples_per_second": 230.934, "eval_steps_per_second": 14.544, "step": 295 }, { "epoch": 4.067796610169491, "grad_norm": 0.9798701405525208, "learning_rate": 0.002945205479452055, "loss": 0.161, "step": 300 }, { "epoch": 4.203389830508475, "grad_norm": 0.36826208233833313, "learning_rate": 0.0028767123287671234, "loss": 0.2181, "step": 310 }, { "epoch": 4.338983050847458, "grad_norm": 0.42575180530548096, "learning_rate": 0.002808219178082192, "loss": 0.1731, "step": 320 }, { "epoch": 4.47457627118644, "grad_norm": 0.413644939661026, "learning_rate": 0.0027397260273972603, "loss": 0.2078, "step": 330 }, { "epoch": 4.610169491525424, "grad_norm": 0.6161473989486694, "learning_rate": 0.002671232876712329, "loss": 0.1798, "step": 340 }, { "epoch": 4.745762711864407, "grad_norm": 0.7980164885520935, "learning_rate": 0.0026027397260273972, "loss": 0.1803, "step": 350 }, { "epoch": 4.88135593220339, "grad_norm": 0.7662364840507507, "learning_rate": 0.002534246575342466, "loss": 0.2043, "step": 360 }, { "epoch": 4.989830508474577, "eval_accuracy": 0.9580152671755725, "eval_f1": 0.9445801203776705, "eval_loss": 0.100668765604496, "eval_precision": 0.9491094147582697, "eval_recall": 0.9402837284585357, "eval_runtime": 2.3077, "eval_samples_per_second": 227.062, "eval_steps_per_second": 14.3, "step": 368 }, { "epoch": 5.016949152542373, "grad_norm": 0.4770575165748596, "learning_rate": 0.002465753424657534, "loss": 0.2144, "step": 370 }, { "epoch": 5.1525423728813555, "grad_norm": 0.5560809969902039, "learning_rate": 0.002397260273972603, "loss": 0.196, "step": 380 }, { "epoch": 5.288135593220339, "grad_norm": 0.3539073169231415, "learning_rate": 0.002328767123287671, "loss": 0.1493, "step": 390 }, { "epoch": 5.423728813559322, "grad_norm": 0.4955459237098694, "learning_rate": 0.00226027397260274, "loss": 0.1548, "step": 400 }, { "epoch": 5.559322033898305, "grad_norm": 0.7003247141838074, "learning_rate": 0.002191780821917808, "loss": 0.1623, "step": 410 }, { "epoch": 5.694915254237288, "grad_norm": 0.37460166215896606, "learning_rate": 0.0021232876712328768, "loss": 0.1964, "step": 420 }, { "epoch": 5.830508474576272, "grad_norm": 0.7236199378967285, "learning_rate": 0.002054794520547945, "loss": 0.2222, "step": 430 }, { "epoch": 5.966101694915254, "grad_norm": 0.47040122747421265, "learning_rate": 0.0019863013698630137, "loss": 0.1717, "step": 440 }, { "epoch": 5.9932203389830505, "eval_accuracy": 0.9618320610687023, "eval_f1": 0.9510417639914043, "eval_loss": 0.09301359206438065, "eval_precision": 0.9431749353009196, "eval_recall": 0.9597829191659526, "eval_runtime": 2.2617, "eval_samples_per_second": 231.684, "eval_steps_per_second": 14.591, "step": 442 }, { "epoch": 6.101694915254237, "grad_norm": 0.38825348019599915, "learning_rate": 0.0019178082191780822, "loss": 0.1433, "step": 450 }, { "epoch": 6.237288135593221, "grad_norm": 0.5254190564155579, "learning_rate": 0.0018493150684931506, "loss": 0.1549, "step": 460 }, { "epoch": 6.372881355932203, "grad_norm": 0.369255930185318, "learning_rate": 0.001780821917808219, "loss": 0.1404, "step": 470 }, { "epoch": 6.508474576271187, "grad_norm": 0.5419684648513794, "learning_rate": 0.0017123287671232876, "loss": 0.1506, "step": 480 }, { "epoch": 6.6440677966101696, "grad_norm": 0.47255194187164307, "learning_rate": 0.001643835616438356, "loss": 0.1852, "step": 490 }, { "epoch": 6.779661016949152, "grad_norm": 0.5880511403083801, "learning_rate": 0.0015753424657534245, "loss": 0.1386, "step": 500 }, { "epoch": 6.915254237288136, "grad_norm": 0.5441922545433044, "learning_rate": 0.0015068493150684932, "loss": 0.1498, "step": 510 }, { "epoch": 6.996610169491525, "eval_accuracy": 0.9637404580152672, "eval_f1": 0.9535961146766969, "eval_loss": 0.08451908081769943, "eval_precision": 0.9448099415204678, "eval_recall": 0.9634866228696563, "eval_runtime": 2.2932, "eval_samples_per_second": 228.506, "eval_steps_per_second": 14.391, "step": 516 }, { "epoch": 7.0508474576271185, "grad_norm": 0.40786656737327576, "learning_rate": 0.0014383561643835617, "loss": 0.1501, "step": 520 }, { "epoch": 7.186440677966102, "grad_norm": 0.5240201950073242, "learning_rate": 0.0013698630136986301, "loss": 0.1619, "step": 530 }, { "epoch": 7.322033898305085, "grad_norm": 0.4185083210468292, "learning_rate": 0.0013013698630136986, "loss": 0.1485, "step": 540 }, { "epoch": 7.4576271186440675, "grad_norm": 0.4574959874153137, "learning_rate": 0.001232876712328767, "loss": 0.1372, "step": 550 }, { "epoch": 7.593220338983051, "grad_norm": 0.4783307909965515, "learning_rate": 0.0011643835616438356, "loss": 0.1501, "step": 560 }, { "epoch": 7.728813559322034, "grad_norm": 0.36722248792648315, "learning_rate": 0.001095890410958904, "loss": 0.1328, "step": 570 }, { "epoch": 7.864406779661017, "grad_norm": 2.2450578212738037, "learning_rate": 0.0010273972602739725, "loss": 0.1672, "step": 580 }, { "epoch": 8.0, "grad_norm": 0.483530730009079, "learning_rate": 0.0009589041095890411, "loss": 0.1531, "step": 590 }, { "epoch": 8.0, "eval_accuracy": 0.933206106870229, "eval_f1": 0.9187542915365363, "eval_loss": 0.166096031665802, "eval_precision": 0.8974050294275013, "eval_recall": 0.9525944968104352, "eval_runtime": 2.4586, "eval_samples_per_second": 213.127, "eval_steps_per_second": 13.422, "step": 590 }, { "epoch": 8.135593220338983, "grad_norm": 0.6294255256652832, "learning_rate": 0.0008904109589041096, "loss": 0.1786, "step": 600 }, { "epoch": 8.271186440677965, "grad_norm": 0.5834006667137146, "learning_rate": 0.000821917808219178, "loss": 0.1564, "step": 610 }, { "epoch": 8.40677966101695, "grad_norm": 1.0836858749389648, "learning_rate": 0.0007534246575342466, "loss": 0.1422, "step": 620 }, { "epoch": 8.542372881355933, "grad_norm": 0.4150519073009491, "learning_rate": 0.0006849315068493151, "loss": 0.1262, "step": 630 }, { "epoch": 8.677966101694915, "grad_norm": 0.4896199703216553, "learning_rate": 0.0006164383561643835, "loss": 0.1024, "step": 640 }, { "epoch": 8.813559322033898, "grad_norm": 0.49235931038856506, "learning_rate": 0.000547945205479452, "loss": 0.1222, "step": 650 }, { "epoch": 8.94915254237288, "grad_norm": 0.7004760503768921, "learning_rate": 0.00047945205479452054, "loss": 0.1451, "step": 660 }, { "epoch": 8.989830508474576, "eval_accuracy": 0.9637404580152672, "eval_f1": 0.9533823109375512, "eval_loss": 0.07597702741622925, "eval_precision": 0.9463719489713148, "eval_recall": 0.9610682662096544, "eval_runtime": 2.256, "eval_samples_per_second": 232.27, "eval_steps_per_second": 14.628, "step": 663 }, { "epoch": 9.084745762711865, "grad_norm": 0.7847811579704285, "learning_rate": 0.000410958904109589, "loss": 0.1416, "step": 670 }, { "epoch": 9.220338983050848, "grad_norm": 0.4217701256275177, "learning_rate": 0.00034246575342465754, "loss": 0.1058, "step": 680 }, { "epoch": 9.35593220338983, "grad_norm": 0.39670172333717346, "learning_rate": 0.000273972602739726, "loss": 0.1227, "step": 690 }, { "epoch": 9.491525423728813, "grad_norm": 0.448311448097229, "learning_rate": 0.0002054794520547945, "loss": 0.1391, "step": 700 }, { "epoch": 9.627118644067796, "grad_norm": 0.2346179038286209, "learning_rate": 0.00014383561643835618, "loss": 0.1029, "step": 710 }, { "epoch": 9.76271186440678, "grad_norm": 0.42317041754722595, "learning_rate": 7.534246575342466e-05, "loss": 0.1341, "step": 720 }, { "epoch": 9.898305084745763, "grad_norm": 2.337646484375, "learning_rate": 1.3698630136986302e-05, "loss": 0.1263, "step": 730 }, { "epoch": 9.898305084745763, "eval_accuracy": 0.9580152671755725, "eval_f1": 0.9466325944854914, "eval_loss": 0.08236818760633469, "eval_precision": 0.9355455710135513, "eval_recall": 0.9596305817385509, "eval_runtime": 2.279, "eval_samples_per_second": 229.924, "eval_steps_per_second": 14.48, "step": 730 }, { "epoch": 9.898305084745763, "step": 730, "total_flos": 3.6369520534486057e+18, "train_loss": 0.20677236253268097, "train_runtime": 441.3046, "train_samples_per_second": 106.684, "train_steps_per_second": 1.654 } ], "logging_steps": 10, "max_steps": 730, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.6369520534486057e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }