|
{ |
|
"best_metric": 0.9347826086956522, |
|
"best_model_checkpoint": "vit-base-patch16-224-finetuned-teeth_dataset/checkpoint-138", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.008695652173913044, |
|
"eval_loss": 4.653251647949219, |
|
"eval_runtime": 128.9785, |
|
"eval_samples_per_second": 3.566, |
|
"eval_steps_per_second": 0.116, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.006521739130434782, |
|
"eval_loss": 4.584750175476074, |
|
"eval_runtime": 9.4797, |
|
"eval_samples_per_second": 48.525, |
|
"eval_steps_per_second": 1.582, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 2.967961311340332, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.6048, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.030434782608695653, |
|
"eval_loss": 4.460752964019775, |
|
"eval_runtime": 9.6304, |
|
"eval_samples_per_second": 47.766, |
|
"eval_steps_per_second": 1.558, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.08478260869565217, |
|
"eval_loss": 4.285670757293701, |
|
"eval_runtime": 9.7655, |
|
"eval_samples_per_second": 47.104, |
|
"eval_steps_per_second": 1.536, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.11521739130434783, |
|
"eval_loss": 4.1469807624816895, |
|
"eval_runtime": 9.6545, |
|
"eval_samples_per_second": 47.646, |
|
"eval_steps_per_second": 1.554, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 3.3595643043518066, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 4.2716, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_accuracy": 0.20434782608695654, |
|
"eval_loss": 3.964137315750122, |
|
"eval_runtime": 9.7181, |
|
"eval_samples_per_second": 47.334, |
|
"eval_steps_per_second": 1.544, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_accuracy": 0.31521739130434784, |
|
"eval_loss": 3.7704641819000244, |
|
"eval_runtime": 9.5221, |
|
"eval_samples_per_second": 48.309, |
|
"eval_steps_per_second": 1.575, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.556402683258057, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 3.7404, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.41956521739130437, |
|
"eval_loss": 3.5808911323547363, |
|
"eval_runtime": 9.3828, |
|
"eval_samples_per_second": 49.026, |
|
"eval_steps_per_second": 1.599, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.45217391304347826, |
|
"eval_loss": 3.4765827655792236, |
|
"eval_runtime": 9.5003, |
|
"eval_samples_per_second": 48.419, |
|
"eval_steps_per_second": 1.579, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_accuracy": 0.508695652173913, |
|
"eval_loss": 3.2980847358703613, |
|
"eval_runtime": 9.6725, |
|
"eval_samples_per_second": 47.557, |
|
"eval_steps_per_second": 1.551, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"grad_norm": 3.8709869384765625, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 3.1589, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"eval_accuracy": 0.6086956521739131, |
|
"eval_loss": 3.1131505966186523, |
|
"eval_runtime": 9.5498, |
|
"eval_samples_per_second": 48.169, |
|
"eval_steps_per_second": 1.571, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6695652173913044, |
|
"eval_loss": 2.949446201324463, |
|
"eval_runtime": 9.5423, |
|
"eval_samples_per_second": 48.206, |
|
"eval_steps_per_second": 1.572, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.6782608695652174, |
|
"eval_loss": 2.836071729660034, |
|
"eval_runtime": 9.5423, |
|
"eval_samples_per_second": 48.207, |
|
"eval_steps_per_second": 1.572, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"grad_norm": 4.395231246948242, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 2.6384, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"eval_accuracy": 0.7347826086956522, |
|
"eval_loss": 2.6520774364471436, |
|
"eval_runtime": 9.5638, |
|
"eval_samples_per_second": 48.098, |
|
"eval_steps_per_second": 1.568, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_accuracy": 0.758695652173913, |
|
"eval_loss": 2.4943137168884277, |
|
"eval_runtime": 9.5047, |
|
"eval_samples_per_second": 48.397, |
|
"eval_steps_per_second": 1.578, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.303729057312012, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.1342, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7847826086956522, |
|
"eval_loss": 2.3421921730041504, |
|
"eval_runtime": 9.1584, |
|
"eval_samples_per_second": 50.227, |
|
"eval_steps_per_second": 1.638, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.8108695652173913, |
|
"eval_loss": 2.2326693534851074, |
|
"eval_runtime": 9.6001, |
|
"eval_samples_per_second": 47.916, |
|
"eval_steps_per_second": 1.562, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_accuracy": 0.8260869565217391, |
|
"eval_loss": 2.083353281021118, |
|
"eval_runtime": 9.5492, |
|
"eval_samples_per_second": 48.172, |
|
"eval_steps_per_second": 1.571, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"grad_norm": 3.915405750274658, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 1.714, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_accuracy": 0.8565217391304348, |
|
"eval_loss": 1.983383059501648, |
|
"eval_runtime": 9.4581, |
|
"eval_samples_per_second": 48.636, |
|
"eval_steps_per_second": 1.586, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8673913043478261, |
|
"eval_loss": 1.8931976556777954, |
|
"eval_runtime": 9.4385, |
|
"eval_samples_per_second": 48.737, |
|
"eval_steps_per_second": 1.589, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_accuracy": 0.8586956521739131, |
|
"eval_loss": 1.861843228340149, |
|
"eval_runtime": 9.2697, |
|
"eval_samples_per_second": 49.624, |
|
"eval_steps_per_second": 1.618, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"grad_norm": 3.428410768508911, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 1.4427, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"eval_accuracy": 0.8891304347826087, |
|
"eval_loss": 1.6974326372146606, |
|
"eval_runtime": 9.5173, |
|
"eval_samples_per_second": 48.333, |
|
"eval_steps_per_second": 1.576, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"eval_accuracy": 0.8891304347826087, |
|
"eval_loss": 1.6662733554840088, |
|
"eval_runtime": 9.3013, |
|
"eval_samples_per_second": 49.456, |
|
"eval_steps_per_second": 1.613, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 4.027013778686523, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.1858, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8847826086956522, |
|
"eval_loss": 1.6013683080673218, |
|
"eval_runtime": 9.5313, |
|
"eval_samples_per_second": 48.262, |
|
"eval_steps_per_second": 1.574, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_accuracy": 0.9043478260869565, |
|
"eval_loss": 1.5112110376358032, |
|
"eval_runtime": 9.5654, |
|
"eval_samples_per_second": 48.09, |
|
"eval_steps_per_second": 1.568, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 25.87, |
|
"eval_accuracy": 0.9108695652173913, |
|
"eval_loss": 1.473188042640686, |
|
"eval_runtime": 9.4856, |
|
"eval_samples_per_second": 48.495, |
|
"eval_steps_per_second": 1.581, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"grad_norm": 3.1277294158935547, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 1.0222, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"eval_accuracy": 0.9065217391304348, |
|
"eval_loss": 1.430389404296875, |
|
"eval_runtime": 9.3124, |
|
"eval_samples_per_second": 49.396, |
|
"eval_steps_per_second": 1.611, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9130434782608695, |
|
"eval_loss": 1.3915013074874878, |
|
"eval_runtime": 9.4281, |
|
"eval_samples_per_second": 48.79, |
|
"eval_steps_per_second": 1.591, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_accuracy": 0.9217391304347826, |
|
"eval_loss": 1.3509211540222168, |
|
"eval_runtime": 9.5983, |
|
"eval_samples_per_second": 47.925, |
|
"eval_steps_per_second": 1.563, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"grad_norm": 2.7500855922698975, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.8306, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"eval_accuracy": 0.9282608695652174, |
|
"eval_loss": 1.3053604364395142, |
|
"eval_runtime": 9.4585, |
|
"eval_samples_per_second": 48.633, |
|
"eval_steps_per_second": 1.586, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"eval_accuracy": 0.9260869565217391, |
|
"eval_loss": 1.2869884967803955, |
|
"eval_runtime": 9.1724, |
|
"eval_samples_per_second": 50.151, |
|
"eval_steps_per_second": 1.635, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 2.7954471111297607, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.7391, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9282608695652174, |
|
"eval_loss": 1.264487385749817, |
|
"eval_runtime": 9.6279, |
|
"eval_samples_per_second": 47.778, |
|
"eval_steps_per_second": 1.558, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy": 0.9260869565217391, |
|
"eval_loss": 1.2453793287277222, |
|
"eval_runtime": 9.2796, |
|
"eval_samples_per_second": 49.571, |
|
"eval_steps_per_second": 1.616, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 33.87, |
|
"eval_accuracy": 0.9282608695652174, |
|
"eval_loss": 1.2394675016403198, |
|
"eval_runtime": 9.5473, |
|
"eval_samples_per_second": 48.181, |
|
"eval_steps_per_second": 1.571, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"grad_norm": 2.7077043056488037, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.6971, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 34.93, |
|
"eval_accuracy": 0.9304347826086956, |
|
"eval_loss": 1.207598090171814, |
|
"eval_runtime": 9.5844, |
|
"eval_samples_per_second": 47.994, |
|
"eval_steps_per_second": 1.565, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9326086956521739, |
|
"eval_loss": 1.1821191310882568, |
|
"eval_runtime": 9.4964, |
|
"eval_samples_per_second": 48.44, |
|
"eval_steps_per_second": 1.58, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 1.1735903024673462, |
|
"eval_runtime": 9.4168, |
|
"eval_samples_per_second": 48.849, |
|
"eval_steps_per_second": 1.593, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"grad_norm": 2.4264731407165527, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.6758, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 37.87, |
|
"eval_accuracy": 0.9326086956521739, |
|
"eval_loss": 1.1671130657196045, |
|
"eval_runtime": 9.6709, |
|
"eval_samples_per_second": 47.565, |
|
"eval_steps_per_second": 1.551, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 1.1656177043914795, |
|
"eval_runtime": 9.2896, |
|
"eval_samples_per_second": 49.518, |
|
"eval_steps_per_second": 1.615, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 2.507667303085327, |
|
"learning_rate": 0.0, |
|
"loss": 0.6445, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9347826086956522, |
|
"eval_loss": 1.1649013757705688, |
|
"eval_runtime": 9.7408, |
|
"eval_samples_per_second": 47.224, |
|
"eval_steps_per_second": 1.54, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 150, |
|
"total_flos": 1.4270027608424448e+18, |
|
"train_loss": 1.9666850312550863, |
|
"train_runtime": 1497.2167, |
|
"train_samples_per_second": 15.362, |
|
"train_steps_per_second": 0.1 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.4270027608424448e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|