|
{ |
|
"best_metric": 0.9534883720930233, |
|
"best_model_checkpoint": "beit-base-patch16-224-75-fold4/checkpoint-144", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.5130478143692017, |
|
"eval_runtime": 0.6812, |
|
"eval_samples_per_second": 63.124, |
|
"eval_steps_per_second": 2.936, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.4860553443431854, |
|
"eval_runtime": 0.6922, |
|
"eval_samples_per_second": 62.12, |
|
"eval_steps_per_second": 2.889, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.4775117039680481, |
|
"eval_runtime": 0.677, |
|
"eval_samples_per_second": 63.512, |
|
"eval_steps_per_second": 2.954, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.44188791513442993, |
|
"eval_runtime": 0.6577, |
|
"eval_samples_per_second": 65.382, |
|
"eval_steps_per_second": 3.041, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.749820709228516, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4909, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.36715349555015564, |
|
"eval_runtime": 0.6592, |
|
"eval_samples_per_second": 65.227, |
|
"eval_steps_per_second": 3.034, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3301140367984772, |
|
"eval_runtime": 0.6452, |
|
"eval_samples_per_second": 66.649, |
|
"eval_steps_per_second": 3.1, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3130995035171509, |
|
"eval_runtime": 0.6545, |
|
"eval_samples_per_second": 65.699, |
|
"eval_steps_per_second": 3.056, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4535352885723114, |
|
"eval_runtime": 0.669, |
|
"eval_samples_per_second": 64.28, |
|
"eval_steps_per_second": 2.99, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.30879294872283936, |
|
"eval_runtime": 0.6641, |
|
"eval_samples_per_second": 64.754, |
|
"eval_steps_per_second": 3.012, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 7.826290130615234, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3473, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4452756345272064, |
|
"eval_runtime": 0.6608, |
|
"eval_samples_per_second": 65.074, |
|
"eval_steps_per_second": 3.027, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.42338627576828003, |
|
"eval_runtime": 0.6819, |
|
"eval_samples_per_second": 63.056, |
|
"eval_steps_per_second": 2.933, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3600829541683197, |
|
"eval_runtime": 0.6913, |
|
"eval_samples_per_second": 62.204, |
|
"eval_steps_per_second": 2.893, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.36579060554504395, |
|
"eval_runtime": 0.6967, |
|
"eval_samples_per_second": 61.723, |
|
"eval_steps_per_second": 2.871, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.30810949206352234, |
|
"eval_runtime": 0.6695, |
|
"eval_samples_per_second": 64.231, |
|
"eval_steps_per_second": 2.988, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.6172356605529785, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.2903, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.41277334094047546, |
|
"eval_runtime": 0.6574, |
|
"eval_samples_per_second": 65.405, |
|
"eval_steps_per_second": 3.042, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.2554984986782074, |
|
"eval_runtime": 0.6511, |
|
"eval_samples_per_second": 66.044, |
|
"eval_steps_per_second": 3.072, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3340766429901123, |
|
"eval_runtime": 0.6603, |
|
"eval_samples_per_second": 65.12, |
|
"eval_steps_per_second": 3.029, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.24271325767040253, |
|
"eval_runtime": 0.6653, |
|
"eval_samples_per_second": 64.632, |
|
"eval_steps_per_second": 3.006, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.4324667751789093, |
|
"eval_runtime": 0.6702, |
|
"eval_samples_per_second": 64.158, |
|
"eval_steps_per_second": 2.984, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.741236209869385, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.2673, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.2636508643627167, |
|
"eval_runtime": 0.6682, |
|
"eval_samples_per_second": 64.354, |
|
"eval_steps_per_second": 2.993, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.29191523790359497, |
|
"eval_runtime": 0.6857, |
|
"eval_samples_per_second": 62.706, |
|
"eval_steps_per_second": 2.917, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.31388339400291443, |
|
"eval_runtime": 0.6588, |
|
"eval_samples_per_second": 65.266, |
|
"eval_steps_per_second": 3.036, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.24111181497573853, |
|
"eval_runtime": 0.6531, |
|
"eval_samples_per_second": 65.842, |
|
"eval_steps_per_second": 3.062, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.464458167552948, |
|
"eval_runtime": 0.6616, |
|
"eval_samples_per_second": 64.999, |
|
"eval_steps_per_second": 3.023, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 14.516473770141602, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.2103, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.508436918258667, |
|
"eval_runtime": 0.6628, |
|
"eval_samples_per_second": 64.877, |
|
"eval_steps_per_second": 3.018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.23078864812850952, |
|
"eval_runtime": 0.6774, |
|
"eval_samples_per_second": 63.482, |
|
"eval_steps_per_second": 2.953, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3450285494327545, |
|
"eval_runtime": 0.6606, |
|
"eval_samples_per_second": 65.094, |
|
"eval_steps_per_second": 3.028, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.34442028403282166, |
|
"eval_runtime": 0.6628, |
|
"eval_samples_per_second": 64.873, |
|
"eval_steps_per_second": 3.017, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.2546083629131317, |
|
"eval_runtime": 0.6779, |
|
"eval_samples_per_second": 63.434, |
|
"eval_steps_per_second": 2.95, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 5.658456802368164, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1673, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.9117240905761719, |
|
"eval_runtime": 0.6726, |
|
"eval_samples_per_second": 63.93, |
|
"eval_steps_per_second": 2.973, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.8437220454216003, |
|
"eval_runtime": 0.6672, |
|
"eval_samples_per_second": 64.452, |
|
"eval_steps_per_second": 2.998, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.6758245825767517, |
|
"eval_runtime": 0.6742, |
|
"eval_samples_per_second": 63.775, |
|
"eval_steps_per_second": 2.966, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.8019335269927979, |
|
"eval_runtime": 0.6637, |
|
"eval_samples_per_second": 64.793, |
|
"eval_steps_per_second": 3.014, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3364037275314331, |
|
"eval_runtime": 0.6581, |
|
"eval_samples_per_second": 65.339, |
|
"eval_steps_per_second": 3.039, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 3.4963107109069824, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1677, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2928450107574463, |
|
"eval_runtime": 0.6586, |
|
"eval_samples_per_second": 65.287, |
|
"eval_steps_per_second": 3.037, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.2546534240245819, |
|
"eval_runtime": 0.6709, |
|
"eval_samples_per_second": 64.096, |
|
"eval_steps_per_second": 2.981, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2968655228614807, |
|
"eval_runtime": 0.6643, |
|
"eval_samples_per_second": 64.731, |
|
"eval_steps_per_second": 3.011, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5706415772438049, |
|
"eval_runtime": 0.6641, |
|
"eval_samples_per_second": 64.754, |
|
"eval_steps_per_second": 3.012, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.700643002986908, |
|
"eval_runtime": 0.6643, |
|
"eval_samples_per_second": 64.734, |
|
"eval_steps_per_second": 3.011, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 7.142909049987793, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1407, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.43211764097213745, |
|
"eval_runtime": 0.6641, |
|
"eval_samples_per_second": 64.75, |
|
"eval_steps_per_second": 3.012, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.43662115931510925, |
|
"eval_runtime": 0.6788, |
|
"eval_samples_per_second": 63.344, |
|
"eval_steps_per_second": 2.946, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.39564797282218933, |
|
"eval_runtime": 0.6577, |
|
"eval_samples_per_second": 65.376, |
|
"eval_steps_per_second": 3.041, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.22897183895111084, |
|
"eval_runtime": 0.6605, |
|
"eval_samples_per_second": 65.097, |
|
"eval_steps_per_second": 3.028, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.36646950244903564, |
|
"eval_runtime": 0.6649, |
|
"eval_samples_per_second": 64.671, |
|
"eval_steps_per_second": 3.008, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 5.7567524909973145, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.1474, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4464525580406189, |
|
"eval_runtime": 0.6626, |
|
"eval_samples_per_second": 64.895, |
|
"eval_steps_per_second": 3.018, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.7278981804847717, |
|
"eval_runtime": 0.666, |
|
"eval_samples_per_second": 64.563, |
|
"eval_steps_per_second": 3.003, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.5259287357330322, |
|
"eval_runtime": 0.6797, |
|
"eval_samples_per_second": 63.263, |
|
"eval_steps_per_second": 2.942, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5832454562187195, |
|
"eval_runtime": 0.6814, |
|
"eval_samples_per_second": 63.101, |
|
"eval_steps_per_second": 2.935, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.73279869556427, |
|
"eval_runtime": 0.6725, |
|
"eval_samples_per_second": 63.937, |
|
"eval_steps_per_second": 2.974, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 6.381649971008301, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1344, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3890153169631958, |
|
"eval_runtime": 0.666, |
|
"eval_samples_per_second": 64.569, |
|
"eval_steps_per_second": 3.003, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.26422181725502014, |
|
"eval_runtime": 0.6656, |
|
"eval_samples_per_second": 64.601, |
|
"eval_steps_per_second": 3.005, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.3710338771343231, |
|
"eval_runtime": 0.6608, |
|
"eval_samples_per_second": 65.077, |
|
"eval_steps_per_second": 3.027, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.47727954387664795, |
|
"eval_runtime": 0.6628, |
|
"eval_samples_per_second": 64.873, |
|
"eval_steps_per_second": 3.017, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3628208339214325, |
|
"eval_runtime": 0.662, |
|
"eval_samples_per_second": 64.957, |
|
"eval_steps_per_second": 3.021, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 3.1835520267486572, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1166, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.43894824385643005, |
|
"eval_runtime": 0.6643, |
|
"eval_samples_per_second": 64.727, |
|
"eval_steps_per_second": 3.011, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.48127347230911255, |
|
"eval_runtime": 0.6668, |
|
"eval_samples_per_second": 64.488, |
|
"eval_steps_per_second": 2.999, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.5327816009521484, |
|
"eval_runtime": 0.6663, |
|
"eval_samples_per_second": 64.537, |
|
"eval_steps_per_second": 3.002, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.5341907739639282, |
|
"eval_runtime": 0.6645, |
|
"eval_samples_per_second": 64.71, |
|
"eval_steps_per_second": 3.01, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.48921194672584534, |
|
"eval_runtime": 0.6577, |
|
"eval_samples_per_second": 65.38, |
|
"eval_steps_per_second": 3.041, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 3.6190884113311768, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.097, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.5857459902763367, |
|
"eval_runtime": 0.6713, |
|
"eval_samples_per_second": 64.059, |
|
"eval_steps_per_second": 2.979, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.6681368350982666, |
|
"eval_runtime": 0.6655, |
|
"eval_samples_per_second": 64.61, |
|
"eval_steps_per_second": 3.005, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.5946548581123352, |
|
"eval_runtime": 0.6669, |
|
"eval_samples_per_second": 64.482, |
|
"eval_steps_per_second": 2.999, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.47492873668670654, |
|
"eval_runtime": 0.6758, |
|
"eval_samples_per_second": 63.633, |
|
"eval_steps_per_second": 2.96, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.6091283559799194, |
|
"eval_runtime": 0.6685, |
|
"eval_samples_per_second": 64.325, |
|
"eval_steps_per_second": 2.992, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 4.258938789367676, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.1076, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.9725148677825928, |
|
"eval_runtime": 0.6743, |
|
"eval_samples_per_second": 63.766, |
|
"eval_steps_per_second": 2.966, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 1.1372181177139282, |
|
"eval_runtime": 0.6651, |
|
"eval_samples_per_second": 64.656, |
|
"eval_steps_per_second": 3.007, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.7108578085899353, |
|
"eval_runtime": 0.6618, |
|
"eval_samples_per_second": 64.972, |
|
"eval_steps_per_second": 3.022, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.35486963391304016, |
|
"eval_runtime": 0.66, |
|
"eval_samples_per_second": 65.147, |
|
"eval_steps_per_second": 3.03, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.27090850472450256, |
|
"eval_runtime": 0.6636, |
|
"eval_samples_per_second": 64.795, |
|
"eval_steps_per_second": 3.014, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 2.5122122764587402, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0914, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3316062092781067, |
|
"eval_runtime": 0.6617, |
|
"eval_samples_per_second": 64.984, |
|
"eval_steps_per_second": 3.023, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.317620187997818, |
|
"eval_runtime": 0.6589, |
|
"eval_samples_per_second": 65.263, |
|
"eval_steps_per_second": 3.035, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.25086623430252075, |
|
"eval_runtime": 0.661, |
|
"eval_samples_per_second": 65.051, |
|
"eval_steps_per_second": 3.026, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.22562167048454285, |
|
"eval_runtime": 0.6643, |
|
"eval_samples_per_second": 64.732, |
|
"eval_steps_per_second": 3.011, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.2569940388202667, |
|
"eval_runtime": 0.6668, |
|
"eval_samples_per_second": 64.488, |
|
"eval_steps_per_second": 2.999, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 4.068800449371338, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0815, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.3081124424934387, |
|
"eval_runtime": 0.6629, |
|
"eval_samples_per_second": 64.862, |
|
"eval_steps_per_second": 3.017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4198541045188904, |
|
"eval_runtime": 0.6549, |
|
"eval_samples_per_second": 65.657, |
|
"eval_steps_per_second": 3.054, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.432374507188797, |
|
"eval_runtime": 0.6731, |
|
"eval_samples_per_second": 63.882, |
|
"eval_steps_per_second": 2.971, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.39275258779525757, |
|
"eval_runtime": 0.6597, |
|
"eval_samples_per_second": 65.176, |
|
"eval_steps_per_second": 3.031, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3700079023838043, |
|
"eval_runtime": 0.6605, |
|
"eval_samples_per_second": 65.106, |
|
"eval_steps_per_second": 3.028, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 3.1357898712158203, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0878, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.38116565346717834, |
|
"eval_runtime": 0.7043, |
|
"eval_samples_per_second": 61.055, |
|
"eval_steps_per_second": 2.84, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4299600422382355, |
|
"eval_runtime": 0.6661, |
|
"eval_samples_per_second": 64.555, |
|
"eval_steps_per_second": 3.003, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4289122223854065, |
|
"eval_runtime": 0.7043, |
|
"eval_samples_per_second": 61.056, |
|
"eval_steps_per_second": 2.84, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.41245317459106445, |
|
"eval_runtime": 0.662, |
|
"eval_samples_per_second": 64.956, |
|
"eval_steps_per_second": 3.021, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.43505820631980896, |
|
"eval_runtime": 0.6683, |
|
"eval_samples_per_second": 64.34, |
|
"eval_steps_per_second": 2.993, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"grad_norm": 3.3373324871063232, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0725, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.5045635104179382, |
|
"eval_runtime": 0.6522, |
|
"eval_samples_per_second": 65.933, |
|
"eval_steps_per_second": 3.067, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.5692218542098999, |
|
"eval_runtime": 0.6625, |
|
"eval_samples_per_second": 64.905, |
|
"eval_steps_per_second": 3.019, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.5486466884613037, |
|
"eval_runtime": 0.6685, |
|
"eval_samples_per_second": 64.327, |
|
"eval_steps_per_second": 2.992, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.5309974551200867, |
|
"eval_runtime": 0.6589, |
|
"eval_samples_per_second": 65.259, |
|
"eval_steps_per_second": 3.035, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4661545753479004, |
|
"eval_runtime": 0.6663, |
|
"eval_samples_per_second": 64.535, |
|
"eval_steps_per_second": 3.002, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 3.57021427154541, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0944, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.40696418285369873, |
|
"eval_runtime": 0.6586, |
|
"eval_samples_per_second": 65.291, |
|
"eval_steps_per_second": 3.037, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3767591118812561, |
|
"eval_runtime": 0.6608, |
|
"eval_samples_per_second": 65.074, |
|
"eval_steps_per_second": 3.027, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3884381651878357, |
|
"eval_runtime": 0.6611, |
|
"eval_samples_per_second": 65.043, |
|
"eval_steps_per_second": 3.025, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3850724995136261, |
|
"eval_runtime": 0.6655, |
|
"eval_samples_per_second": 64.611, |
|
"eval_steps_per_second": 3.005, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.37587684392929077, |
|
"eval_runtime": 0.6694, |
|
"eval_samples_per_second": 64.238, |
|
"eval_steps_per_second": 2.988, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"grad_norm": 2.757040023803711, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0739, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3608282804489136, |
|
"eval_runtime": 0.6628, |
|
"eval_samples_per_second": 64.873, |
|
"eval_steps_per_second": 3.017, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.34555718302726746, |
|
"eval_runtime": 0.6719, |
|
"eval_samples_per_second": 63.999, |
|
"eval_steps_per_second": 2.977, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3360309302806854, |
|
"eval_runtime": 0.6604, |
|
"eval_samples_per_second": 65.116, |
|
"eval_steps_per_second": 3.029, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.33121442794799805, |
|
"eval_runtime": 0.669, |
|
"eval_samples_per_second": 64.28, |
|
"eval_steps_per_second": 2.99, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.3320866823196411, |
|
"eval_runtime": 0.6618, |
|
"eval_samples_per_second": 64.972, |
|
"eval_steps_per_second": 3.022, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.8462013006210327, |
|
"learning_rate": 0.0, |
|
"loss": 0.0612, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.33314311504364014, |
|
"eval_runtime": 0.6673, |
|
"eval_samples_per_second": 64.436, |
|
"eval_steps_per_second": 2.997, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 200, |
|
"total_flos": 1.8667587341684736e+18, |
|
"train_loss": 0.16236644983291626, |
|
"train_runtime": 1253.5555, |
|
"train_samples_per_second": 19.225, |
|
"train_steps_per_second": 0.16 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.25086623430252075, |
|
"eval_runtime": 0.7108, |
|
"eval_samples_per_second": 60.499, |
|
"eval_steps_per_second": 2.814, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.8667587341684736e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|