|
{ |
|
"best_metric": 0.9534883720930233, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-75-fold5/checkpoint-100", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3488372093023256, |
|
"eval_loss": 0.8041083812713623, |
|
"eval_runtime": 0.6249, |
|
"eval_samples_per_second": 68.814, |
|
"eval_steps_per_second": 3.201, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7209302325581395, |
|
"eval_loss": 0.6042466163635254, |
|
"eval_runtime": 0.607, |
|
"eval_samples_per_second": 70.843, |
|
"eval_steps_per_second": 3.295, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.6582099199295044, |
|
"eval_runtime": 0.5929, |
|
"eval_samples_per_second": 72.531, |
|
"eval_steps_per_second": 3.374, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.700655996799469, |
|
"eval_runtime": 0.6048, |
|
"eval_samples_per_second": 71.093, |
|
"eval_steps_per_second": 3.307, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.375382900238037, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6827, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6976744186046512, |
|
"eval_loss": 0.5652520656585693, |
|
"eval_runtime": 0.6007, |
|
"eval_samples_per_second": 71.587, |
|
"eval_steps_per_second": 3.33, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.4344584047794342, |
|
"eval_runtime": 0.6065, |
|
"eval_samples_per_second": 70.898, |
|
"eval_steps_per_second": 3.298, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.44925662875175476, |
|
"eval_runtime": 0.6087, |
|
"eval_samples_per_second": 70.643, |
|
"eval_steps_per_second": 3.286, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.47643980383872986, |
|
"eval_runtime": 0.6082, |
|
"eval_samples_per_second": 70.705, |
|
"eval_steps_per_second": 3.289, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.3636239171028137, |
|
"eval_runtime": 0.6061, |
|
"eval_samples_per_second": 70.943, |
|
"eval_steps_per_second": 3.3, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.3277673721313477, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4147, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.2768890857696533, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 69.907, |
|
"eval_steps_per_second": 3.252, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.35381999611854553, |
|
"eval_runtime": 0.6067, |
|
"eval_samples_per_second": 70.869, |
|
"eval_steps_per_second": 3.296, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.3320469856262207, |
|
"eval_runtime": 0.605, |
|
"eval_samples_per_second": 71.076, |
|
"eval_steps_per_second": 3.306, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.2667577266693115, |
|
"eval_runtime": 0.6119, |
|
"eval_samples_per_second": 70.272, |
|
"eval_steps_per_second": 3.268, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.28225573897361755, |
|
"eval_runtime": 0.6046, |
|
"eval_samples_per_second": 71.122, |
|
"eval_steps_per_second": 3.308, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.5156199932098389, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.3405, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.48642632365226746, |
|
"eval_runtime": 0.6124, |
|
"eval_samples_per_second": 70.212, |
|
"eval_steps_per_second": 3.266, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.44160977005958557, |
|
"eval_runtime": 0.609, |
|
"eval_samples_per_second": 70.608, |
|
"eval_steps_per_second": 3.284, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.3585019111633301, |
|
"eval_runtime": 0.5963, |
|
"eval_samples_per_second": 72.111, |
|
"eval_steps_per_second": 3.354, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.47688964009284973, |
|
"eval_runtime": 0.6095, |
|
"eval_samples_per_second": 70.545, |
|
"eval_steps_per_second": 3.281, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.5918123722076416, |
|
"eval_runtime": 0.6107, |
|
"eval_samples_per_second": 70.412, |
|
"eval_steps_per_second": 3.275, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.0539426803588867, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3074, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4116123914718628, |
|
"eval_runtime": 0.6061, |
|
"eval_samples_per_second": 70.94, |
|
"eval_steps_per_second": 3.3, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.4578164219856262, |
|
"eval_runtime": 0.6053, |
|
"eval_samples_per_second": 71.035, |
|
"eval_steps_per_second": 3.304, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4294234812259674, |
|
"eval_runtime": 0.6208, |
|
"eval_samples_per_second": 69.27, |
|
"eval_steps_per_second": 3.222, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.540077269077301, |
|
"eval_runtime": 0.6113, |
|
"eval_samples_per_second": 70.345, |
|
"eval_steps_per_second": 3.272, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.20692048966884613, |
|
"eval_runtime": 0.6022, |
|
"eval_samples_per_second": 71.403, |
|
"eval_steps_per_second": 3.321, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 5.078647136688232, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.2155, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.22156542539596558, |
|
"eval_runtime": 0.6009, |
|
"eval_samples_per_second": 71.556, |
|
"eval_steps_per_second": 3.328, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.38367632031440735, |
|
"eval_runtime": 0.6007, |
|
"eval_samples_per_second": 71.58, |
|
"eval_steps_per_second": 3.329, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.24731607735157013, |
|
"eval_runtime": 0.6066, |
|
"eval_samples_per_second": 70.882, |
|
"eval_steps_per_second": 3.297, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.22330066561698914, |
|
"eval_runtime": 0.612, |
|
"eval_samples_per_second": 70.259, |
|
"eval_steps_per_second": 3.268, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3489048182964325, |
|
"eval_runtime": 0.6096, |
|
"eval_samples_per_second": 70.543, |
|
"eval_steps_per_second": 3.281, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 3.3193013668060303, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4634832739830017, |
|
"eval_runtime": 0.6073, |
|
"eval_samples_per_second": 70.802, |
|
"eval_steps_per_second": 3.293, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.24913936853408813, |
|
"eval_runtime": 0.6169, |
|
"eval_samples_per_second": 69.704, |
|
"eval_steps_per_second": 3.242, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2564470171928406, |
|
"eval_runtime": 0.6188, |
|
"eval_samples_per_second": 69.484, |
|
"eval_steps_per_second": 3.232, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3655398488044739, |
|
"eval_runtime": 0.6099, |
|
"eval_samples_per_second": 70.504, |
|
"eval_steps_per_second": 3.279, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.24981090426445007, |
|
"eval_runtime": 0.6235, |
|
"eval_samples_per_second": 68.962, |
|
"eval_steps_per_second": 3.208, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 1.9860986471176147, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1324, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2921779453754425, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 70.191, |
|
"eval_steps_per_second": 3.265, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5368495583534241, |
|
"eval_runtime": 0.6089, |
|
"eval_samples_per_second": 70.616, |
|
"eval_steps_per_second": 3.284, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5738598108291626, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 70.13, |
|
"eval_steps_per_second": 3.262, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5049097537994385, |
|
"eval_runtime": 0.6178, |
|
"eval_samples_per_second": 69.6, |
|
"eval_steps_per_second": 3.237, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5902883410453796, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 70.176, |
|
"eval_steps_per_second": 3.264, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 2.2311851978302, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1222, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4885583817958832, |
|
"eval_runtime": 0.6261, |
|
"eval_samples_per_second": 68.678, |
|
"eval_steps_per_second": 3.194, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.41742998361587524, |
|
"eval_runtime": 0.6335, |
|
"eval_samples_per_second": 67.877, |
|
"eval_steps_per_second": 3.157, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.542888343334198, |
|
"eval_runtime": 0.5996, |
|
"eval_samples_per_second": 71.718, |
|
"eval_steps_per_second": 3.336, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.6896758675575256, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 69.953, |
|
"eval_steps_per_second": 3.254, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.6804940700531006, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 69.918, |
|
"eval_steps_per_second": 3.252, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 2.4388444423675537, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.1008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4073416590690613, |
|
"eval_runtime": 0.6191, |
|
"eval_samples_per_second": 69.46, |
|
"eval_steps_per_second": 3.231, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4160747230052948, |
|
"eval_runtime": 0.6162, |
|
"eval_samples_per_second": 69.777, |
|
"eval_steps_per_second": 3.245, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.6485046744346619, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 70.12, |
|
"eval_steps_per_second": 3.261, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.6746022701263428, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 69.639, |
|
"eval_steps_per_second": 3.239, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.44333821535110474, |
|
"eval_runtime": 0.6198, |
|
"eval_samples_per_second": 69.374, |
|
"eval_steps_per_second": 3.227, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 1.96018385887146, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.117, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.278799444437027, |
|
"eval_runtime": 0.6169, |
|
"eval_samples_per_second": 69.706, |
|
"eval_steps_per_second": 3.242, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3440650999546051, |
|
"eval_runtime": 0.6161, |
|
"eval_samples_per_second": 69.797, |
|
"eval_steps_per_second": 3.246, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.46627554297447205, |
|
"eval_runtime": 0.6164, |
|
"eval_samples_per_second": 69.765, |
|
"eval_steps_per_second": 3.245, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.3299558460712433, |
|
"eval_runtime": 0.6159, |
|
"eval_samples_per_second": 69.814, |
|
"eval_steps_per_second": 3.247, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.25306156277656555, |
|
"eval_runtime": 0.6221, |
|
"eval_samples_per_second": 69.12, |
|
"eval_steps_per_second": 3.215, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 1.9721273183822632, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0879, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.22605039179325104, |
|
"eval_runtime": 0.616, |
|
"eval_samples_per_second": 69.808, |
|
"eval_steps_per_second": 3.247, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.34897878766059875, |
|
"eval_runtime": 0.6198, |
|
"eval_samples_per_second": 69.373, |
|
"eval_steps_per_second": 3.227, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5059694647789001, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 70.104, |
|
"eval_steps_per_second": 3.261, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4846828877925873, |
|
"eval_runtime": 0.6108, |
|
"eval_samples_per_second": 70.402, |
|
"eval_steps_per_second": 3.274, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3603507876396179, |
|
"eval_runtime": 0.6084, |
|
"eval_samples_per_second": 70.675, |
|
"eval_steps_per_second": 3.287, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 5.5369553565979, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0898, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3626036047935486, |
|
"eval_runtime": 0.6067, |
|
"eval_samples_per_second": 70.87, |
|
"eval_steps_per_second": 3.296, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4641226530075073, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 70.124, |
|
"eval_steps_per_second": 3.262, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4605565667152405, |
|
"eval_runtime": 0.6118, |
|
"eval_samples_per_second": 70.282, |
|
"eval_steps_per_second": 3.269, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.34193670749664307, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 69.988, |
|
"eval_steps_per_second": 3.255, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.3331243693828583, |
|
"eval_runtime": 0.6254, |
|
"eval_samples_per_second": 68.751, |
|
"eval_steps_per_second": 3.198, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 2.0372941493988037, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.095, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3803730010986328, |
|
"eval_runtime": 0.6294, |
|
"eval_samples_per_second": 68.319, |
|
"eval_steps_per_second": 3.178, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.37833139300346375, |
|
"eval_runtime": 0.6275, |
|
"eval_samples_per_second": 68.521, |
|
"eval_steps_per_second": 3.187, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3523567020893097, |
|
"eval_runtime": 0.6243, |
|
"eval_samples_per_second": 68.873, |
|
"eval_steps_per_second": 3.203, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.423170804977417, |
|
"eval_runtime": 0.675, |
|
"eval_samples_per_second": 63.699, |
|
"eval_steps_per_second": 2.963, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.41566768288612366, |
|
"eval_runtime": 0.6177, |
|
"eval_samples_per_second": 69.612, |
|
"eval_steps_per_second": 3.238, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 1.394300103187561, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.493731826543808, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 69.919, |
|
"eval_steps_per_second": 3.252, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4616422951221466, |
|
"eval_runtime": 0.6152, |
|
"eval_samples_per_second": 69.892, |
|
"eval_steps_per_second": 3.251, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.43878328800201416, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 69.921, |
|
"eval_steps_per_second": 3.252, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.38112929463386536, |
|
"eval_runtime": 0.6155, |
|
"eval_samples_per_second": 69.861, |
|
"eval_steps_per_second": 3.249, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.25220033526420593, |
|
"eval_runtime": 0.61, |
|
"eval_samples_per_second": 70.495, |
|
"eval_steps_per_second": 3.279, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 1.9808557033538818, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0855, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.16766749322414398, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 70.05, |
|
"eval_steps_per_second": 3.258, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.16528721153736115, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 70.01, |
|
"eval_steps_per_second": 3.256, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.21727260947227478, |
|
"eval_runtime": 0.6179, |
|
"eval_samples_per_second": 69.594, |
|
"eval_steps_per_second": 3.237, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3596310019493103, |
|
"eval_runtime": 0.6118, |
|
"eval_samples_per_second": 70.286, |
|
"eval_steps_per_second": 3.269, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4653131663799286, |
|
"eval_runtime": 0.6097, |
|
"eval_samples_per_second": 70.526, |
|
"eval_steps_per_second": 3.28, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 2.2111823558807373, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.077, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.47392675280570984, |
|
"eval_runtime": 0.6183, |
|
"eval_samples_per_second": 69.54, |
|
"eval_steps_per_second": 3.234, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.360915869474411, |
|
"eval_runtime": 0.6117, |
|
"eval_samples_per_second": 70.296, |
|
"eval_steps_per_second": 3.27, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.23335148394107819, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 70.004, |
|
"eval_steps_per_second": 3.256, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.21253377199172974, |
|
"eval_runtime": 0.6169, |
|
"eval_samples_per_second": 69.706, |
|
"eval_steps_per_second": 3.242, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.2594672441482544, |
|
"eval_runtime": 0.6281, |
|
"eval_samples_per_second": 68.465, |
|
"eval_steps_per_second": 3.184, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"grad_norm": 1.4013921022415161, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0731, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.37020593881607056, |
|
"eval_runtime": 0.6591, |
|
"eval_samples_per_second": 65.245, |
|
"eval_steps_per_second": 3.035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.4634316563606262, |
|
"eval_runtime": 0.6372, |
|
"eval_samples_per_second": 67.483, |
|
"eval_steps_per_second": 3.139, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5253772139549255, |
|
"eval_runtime": 0.6706, |
|
"eval_samples_per_second": 64.119, |
|
"eval_steps_per_second": 2.982, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5813474655151367, |
|
"eval_runtime": 0.6652, |
|
"eval_samples_per_second": 64.637, |
|
"eval_steps_per_second": 3.006, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5628194212913513, |
|
"eval_runtime": 0.6372, |
|
"eval_samples_per_second": 67.487, |
|
"eval_steps_per_second": 3.139, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 1.3443642854690552, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5479036569595337, |
|
"eval_runtime": 0.6115, |
|
"eval_samples_per_second": 70.317, |
|
"eval_steps_per_second": 3.271, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5215557217597961, |
|
"eval_runtime": 0.6168, |
|
"eval_samples_per_second": 69.718, |
|
"eval_steps_per_second": 3.243, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.509810745716095, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 69.933, |
|
"eval_steps_per_second": 3.253, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5243856310844421, |
|
"eval_runtime": 0.6268, |
|
"eval_samples_per_second": 68.603, |
|
"eval_steps_per_second": 3.191, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5471506118774414, |
|
"eval_runtime": 0.622, |
|
"eval_samples_per_second": 69.132, |
|
"eval_steps_per_second": 3.215, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"grad_norm": 1.141045331954956, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.079, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.558978259563446, |
|
"eval_runtime": 0.6529, |
|
"eval_samples_per_second": 65.862, |
|
"eval_steps_per_second": 3.063, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5589279532432556, |
|
"eval_runtime": 0.6301, |
|
"eval_samples_per_second": 68.238, |
|
"eval_steps_per_second": 3.174, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5455338954925537, |
|
"eval_runtime": 0.6365, |
|
"eval_samples_per_second": 67.555, |
|
"eval_steps_per_second": 3.142, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5321924686431885, |
|
"eval_runtime": 0.62, |
|
"eval_samples_per_second": 69.358, |
|
"eval_steps_per_second": 3.226, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5214679837226868, |
|
"eval_runtime": 0.6383, |
|
"eval_samples_per_second": 67.365, |
|
"eval_steps_per_second": 3.133, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.9139795303344727, |
|
"learning_rate": 0.0, |
|
"loss": 0.0477, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.5201581716537476, |
|
"eval_runtime": 0.624, |
|
"eval_samples_per_second": 68.908, |
|
"eval_steps_per_second": 3.205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 200, |
|
"total_flos": 1.867590382823424e+18, |
|
"train_loss": 0.170336701720953, |
|
"train_runtime": 1121.821, |
|
"train_samples_per_second": 21.483, |
|
"train_steps_per_second": 0.178 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9534883720930233, |
|
"eval_loss": 0.278799444437027, |
|
"eval_runtime": 0.6419, |
|
"eval_samples_per_second": 66.992, |
|
"eval_steps_per_second": 3.116, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.867590382823424e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|