{ "best_metric": 0.9347826086956522, "best_model_checkpoint": "vit-base-patch16-224-finetuned-teeth_dataset/checkpoint-138", "epoch": 40.0, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "eval_accuracy": 0.008695652173913044, "eval_loss": 4.653251647949219, "eval_runtime": 128.9785, "eval_samples_per_second": 3.566, "eval_steps_per_second": 0.116, "step": 3 }, { "epoch": 1.87, "eval_accuracy": 0.006521739130434782, "eval_loss": 4.584750175476074, "eval_runtime": 9.4797, "eval_samples_per_second": 48.525, "eval_steps_per_second": 1.582, "step": 7 }, { "epoch": 2.67, "grad_norm": 2.967961311340332, "learning_rate": 3.3333333333333335e-05, "loss": 4.6048, "step": 10 }, { "epoch": 2.93, "eval_accuracy": 0.030434782608695653, "eval_loss": 4.460752964019775, "eval_runtime": 9.6304, "eval_samples_per_second": 47.766, "eval_steps_per_second": 1.558, "step": 11 }, { "epoch": 4.0, "eval_accuracy": 0.08478260869565217, "eval_loss": 4.285670757293701, "eval_runtime": 9.7655, "eval_samples_per_second": 47.104, "eval_steps_per_second": 1.536, "step": 15 }, { "epoch": 4.8, "eval_accuracy": 0.11521739130434783, "eval_loss": 4.1469807624816895, "eval_runtime": 9.6545, "eval_samples_per_second": 47.646, "eval_steps_per_second": 1.554, "step": 18 }, { "epoch": 5.33, "grad_norm": 3.3595643043518066, "learning_rate": 4.814814814814815e-05, "loss": 4.2716, "step": 20 }, { "epoch": 5.87, "eval_accuracy": 0.20434782608695654, "eval_loss": 3.964137315750122, "eval_runtime": 9.7181, "eval_samples_per_second": 47.334, "eval_steps_per_second": 1.544, "step": 22 }, { "epoch": 6.93, "eval_accuracy": 0.31521739130434784, "eval_loss": 3.7704641819000244, "eval_runtime": 9.5221, "eval_samples_per_second": 48.309, "eval_steps_per_second": 1.575, "step": 26 }, { "epoch": 8.0, "grad_norm": 4.556402683258057, "learning_rate": 4.4444444444444447e-05, "loss": 3.7404, "step": 30 }, { "epoch": 8.0, "eval_accuracy": 0.41956521739130437, "eval_loss": 3.5808911323547363, "eval_runtime": 9.3828, "eval_samples_per_second": 49.026, "eval_steps_per_second": 1.599, "step": 30 }, { "epoch": 8.8, "eval_accuracy": 0.45217391304347826, "eval_loss": 3.4765827655792236, "eval_runtime": 9.5003, "eval_samples_per_second": 48.419, "eval_steps_per_second": 1.579, "step": 33 }, { "epoch": 9.87, "eval_accuracy": 0.508695652173913, "eval_loss": 3.2980847358703613, "eval_runtime": 9.6725, "eval_samples_per_second": 47.557, "eval_steps_per_second": 1.551, "step": 37 }, { "epoch": 10.67, "grad_norm": 3.8709869384765625, "learning_rate": 4.074074074074074e-05, "loss": 3.1589, "step": 40 }, { "epoch": 10.93, "eval_accuracy": 0.6086956521739131, "eval_loss": 3.1131505966186523, "eval_runtime": 9.5498, "eval_samples_per_second": 48.169, "eval_steps_per_second": 1.571, "step": 41 }, { "epoch": 12.0, "eval_accuracy": 0.6695652173913044, "eval_loss": 2.949446201324463, "eval_runtime": 9.5423, "eval_samples_per_second": 48.206, "eval_steps_per_second": 1.572, "step": 45 }, { "epoch": 12.8, "eval_accuracy": 0.6782608695652174, "eval_loss": 2.836071729660034, "eval_runtime": 9.5423, "eval_samples_per_second": 48.207, "eval_steps_per_second": 1.572, "step": 48 }, { "epoch": 13.33, "grad_norm": 4.395231246948242, "learning_rate": 3.7037037037037037e-05, "loss": 2.6384, "step": 50 }, { "epoch": 13.87, "eval_accuracy": 0.7347826086956522, "eval_loss": 2.6520774364471436, "eval_runtime": 9.5638, "eval_samples_per_second": 48.098, "eval_steps_per_second": 1.568, "step": 52 }, { "epoch": 14.93, "eval_accuracy": 0.758695652173913, "eval_loss": 2.4943137168884277, "eval_runtime": 9.5047, "eval_samples_per_second": 48.397, "eval_steps_per_second": 1.578, "step": 56 }, { "epoch": 16.0, "grad_norm": 4.303729057312012, "learning_rate": 3.3333333333333335e-05, "loss": 2.1342, "step": 60 }, { "epoch": 16.0, "eval_accuracy": 0.7847826086956522, "eval_loss": 2.3421921730041504, "eval_runtime": 9.1584, "eval_samples_per_second": 50.227, "eval_steps_per_second": 1.638, "step": 60 }, { "epoch": 16.8, "eval_accuracy": 0.8108695652173913, "eval_loss": 2.2326693534851074, "eval_runtime": 9.6001, "eval_samples_per_second": 47.916, "eval_steps_per_second": 1.562, "step": 63 }, { "epoch": 17.87, "eval_accuracy": 0.8260869565217391, "eval_loss": 2.083353281021118, "eval_runtime": 9.5492, "eval_samples_per_second": 48.172, "eval_steps_per_second": 1.571, "step": 67 }, { "epoch": 18.67, "grad_norm": 3.915405750274658, "learning_rate": 2.962962962962963e-05, "loss": 1.714, "step": 70 }, { "epoch": 18.93, "eval_accuracy": 0.8565217391304348, "eval_loss": 1.983383059501648, "eval_runtime": 9.4581, "eval_samples_per_second": 48.636, "eval_steps_per_second": 1.586, "step": 71 }, { "epoch": 20.0, "eval_accuracy": 0.8673913043478261, "eval_loss": 1.8931976556777954, "eval_runtime": 9.4385, "eval_samples_per_second": 48.737, "eval_steps_per_second": 1.589, "step": 75 }, { "epoch": 20.8, "eval_accuracy": 0.8586956521739131, "eval_loss": 1.861843228340149, "eval_runtime": 9.2697, "eval_samples_per_second": 49.624, "eval_steps_per_second": 1.618, "step": 78 }, { "epoch": 21.33, "grad_norm": 3.428410768508911, "learning_rate": 2.5925925925925925e-05, "loss": 1.4427, "step": 80 }, { "epoch": 21.87, "eval_accuracy": 0.8891304347826087, "eval_loss": 1.6974326372146606, "eval_runtime": 9.5173, "eval_samples_per_second": 48.333, "eval_steps_per_second": 1.576, "step": 82 }, { "epoch": 22.93, "eval_accuracy": 0.8891304347826087, "eval_loss": 1.6662733554840088, "eval_runtime": 9.3013, "eval_samples_per_second": 49.456, "eval_steps_per_second": 1.613, "step": 86 }, { "epoch": 24.0, "grad_norm": 4.027013778686523, "learning_rate": 2.2222222222222223e-05, "loss": 1.1858, "step": 90 }, { "epoch": 24.0, "eval_accuracy": 0.8847826086956522, "eval_loss": 1.6013683080673218, "eval_runtime": 9.5313, "eval_samples_per_second": 48.262, "eval_steps_per_second": 1.574, "step": 90 }, { "epoch": 24.8, "eval_accuracy": 0.9043478260869565, "eval_loss": 1.5112110376358032, "eval_runtime": 9.5654, "eval_samples_per_second": 48.09, "eval_steps_per_second": 1.568, "step": 93 }, { "epoch": 25.87, "eval_accuracy": 0.9108695652173913, "eval_loss": 1.473188042640686, "eval_runtime": 9.4856, "eval_samples_per_second": 48.495, "eval_steps_per_second": 1.581, "step": 97 }, { "epoch": 26.67, "grad_norm": 3.1277294158935547, "learning_rate": 1.8518518518518518e-05, "loss": 1.0222, "step": 100 }, { "epoch": 26.93, "eval_accuracy": 0.9065217391304348, "eval_loss": 1.430389404296875, "eval_runtime": 9.3124, "eval_samples_per_second": 49.396, "eval_steps_per_second": 1.611, "step": 101 }, { "epoch": 28.0, "eval_accuracy": 0.9130434782608695, "eval_loss": 1.3915013074874878, "eval_runtime": 9.4281, "eval_samples_per_second": 48.79, "eval_steps_per_second": 1.591, "step": 105 }, { "epoch": 28.8, "eval_accuracy": 0.9217391304347826, "eval_loss": 1.3509211540222168, "eval_runtime": 9.5983, "eval_samples_per_second": 47.925, "eval_steps_per_second": 1.563, "step": 108 }, { "epoch": 29.33, "grad_norm": 2.7500855922698975, "learning_rate": 1.4814814814814815e-05, "loss": 0.8306, "step": 110 }, { "epoch": 29.87, "eval_accuracy": 0.9282608695652174, "eval_loss": 1.3053604364395142, "eval_runtime": 9.4585, "eval_samples_per_second": 48.633, "eval_steps_per_second": 1.586, "step": 112 }, { "epoch": 30.93, "eval_accuracy": 0.9260869565217391, "eval_loss": 1.2869884967803955, "eval_runtime": 9.1724, "eval_samples_per_second": 50.151, "eval_steps_per_second": 1.635, "step": 116 }, { "epoch": 32.0, "grad_norm": 2.7954471111297607, "learning_rate": 1.1111111111111112e-05, "loss": 0.7391, "step": 120 }, { "epoch": 32.0, "eval_accuracy": 0.9282608695652174, "eval_loss": 1.264487385749817, "eval_runtime": 9.6279, "eval_samples_per_second": 47.778, "eval_steps_per_second": 1.558, "step": 120 }, { "epoch": 32.8, "eval_accuracy": 0.9260869565217391, "eval_loss": 1.2453793287277222, "eval_runtime": 9.2796, "eval_samples_per_second": 49.571, "eval_steps_per_second": 1.616, "step": 123 }, { "epoch": 33.87, "eval_accuracy": 0.9282608695652174, "eval_loss": 1.2394675016403198, "eval_runtime": 9.5473, "eval_samples_per_second": 48.181, "eval_steps_per_second": 1.571, "step": 127 }, { "epoch": 34.67, "grad_norm": 2.7077043056488037, "learning_rate": 7.4074074074074075e-06, "loss": 0.6971, "step": 130 }, { "epoch": 34.93, "eval_accuracy": 0.9304347826086956, "eval_loss": 1.207598090171814, "eval_runtime": 9.5844, "eval_samples_per_second": 47.994, "eval_steps_per_second": 1.565, "step": 131 }, { "epoch": 36.0, "eval_accuracy": 0.9326086956521739, "eval_loss": 1.1821191310882568, "eval_runtime": 9.4964, "eval_samples_per_second": 48.44, "eval_steps_per_second": 1.58, "step": 135 }, { "epoch": 36.8, "eval_accuracy": 0.9347826086956522, "eval_loss": 1.1735903024673462, "eval_runtime": 9.4168, "eval_samples_per_second": 48.849, "eval_steps_per_second": 1.593, "step": 138 }, { "epoch": 37.33, "grad_norm": 2.4264731407165527, "learning_rate": 3.7037037037037037e-06, "loss": 0.6758, "step": 140 }, { "epoch": 37.87, "eval_accuracy": 0.9326086956521739, "eval_loss": 1.1671130657196045, "eval_runtime": 9.6709, "eval_samples_per_second": 47.565, "eval_steps_per_second": 1.551, "step": 142 }, { "epoch": 38.93, "eval_accuracy": 0.9347826086956522, "eval_loss": 1.1656177043914795, "eval_runtime": 9.2896, "eval_samples_per_second": 49.518, "eval_steps_per_second": 1.615, "step": 146 }, { "epoch": 40.0, "grad_norm": 2.507667303085327, "learning_rate": 0.0, "loss": 0.6445, "step": 150 }, { "epoch": 40.0, "eval_accuracy": 0.9347826086956522, "eval_loss": 1.1649013757705688, "eval_runtime": 9.7408, "eval_samples_per_second": 47.224, "eval_steps_per_second": 1.54, "step": 150 }, { "epoch": 40.0, "step": 150, "total_flos": 1.4270027608424448e+18, "train_loss": 1.9666850312550863, "train_runtime": 1497.2167, "train_samples_per_second": 15.362, "train_steps_per_second": 0.1 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.4270027608424448e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }