|
{ |
|
"best_metric": 1.375998616218567, |
|
"best_model_checkpoint": "animal_151_types_image_detection/checkpoint-11400", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 11400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.037527593818984545, |
|
"eval_loss": 4.962153434753418, |
|
"eval_runtime": 14.7302, |
|
"eval_samples_per_second": 123.013, |
|
"eval_steps_per_second": 3.87, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.1335540838852097, |
|
"eval_loss": 4.885277271270752, |
|
"eval_runtime": 14.5295, |
|
"eval_samples_per_second": 124.712, |
|
"eval_steps_per_second": 3.923, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3498896247240618, |
|
"eval_loss": 4.794714450836182, |
|
"eval_runtime": 14.7328, |
|
"eval_samples_per_second": 122.99, |
|
"eval_steps_per_second": 3.869, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.576710816777042, |
|
"eval_loss": 4.702880382537842, |
|
"eval_runtime": 15.1167, |
|
"eval_samples_per_second": 119.868, |
|
"eval_steps_per_second": 3.771, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 4.801762114537446e-06, |
|
"loss": 4.8462, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7400662251655629, |
|
"eval_loss": 4.61031436920166, |
|
"eval_runtime": 14.5797, |
|
"eval_samples_per_second": 124.282, |
|
"eval_steps_per_second": 3.91, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8173289183222958, |
|
"eval_loss": 4.516598701477051, |
|
"eval_runtime": 14.6496, |
|
"eval_samples_per_second": 123.689, |
|
"eval_steps_per_second": 3.891, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8559602649006622, |
|
"eval_loss": 4.426823616027832, |
|
"eval_runtime": 14.6782, |
|
"eval_samples_per_second": 123.448, |
|
"eval_steps_per_second": 3.883, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8846578366445916, |
|
"eval_loss": 4.338470935821533, |
|
"eval_runtime": 14.8516, |
|
"eval_samples_per_second": 122.007, |
|
"eval_steps_per_second": 3.838, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 4.581497797356828e-06, |
|
"loss": 4.4139, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8979028697571744, |
|
"eval_loss": 4.253594875335693, |
|
"eval_runtime": 14.9472, |
|
"eval_samples_per_second": 121.226, |
|
"eval_steps_per_second": 3.813, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9150110375275938, |
|
"eval_loss": 4.168024063110352, |
|
"eval_runtime": 14.7122, |
|
"eval_samples_per_second": 123.163, |
|
"eval_steps_per_second": 3.874, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9172185430463576, |
|
"eval_loss": 4.086403846740723, |
|
"eval_runtime": 14.7431, |
|
"eval_samples_per_second": 122.905, |
|
"eval_steps_per_second": 3.866, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9321192052980133, |
|
"eval_loss": 4.009535789489746, |
|
"eval_runtime": 14.777, |
|
"eval_samples_per_second": 122.623, |
|
"eval_steps_per_second": 3.857, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9398454746136865, |
|
"eval_loss": 3.932218313217163, |
|
"eval_runtime": 14.842, |
|
"eval_samples_per_second": 122.086, |
|
"eval_steps_per_second": 3.84, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 4.361233480176212e-06, |
|
"loss": 4.0075, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9420529801324503, |
|
"eval_loss": 3.859485149383545, |
|
"eval_runtime": 14.5871, |
|
"eval_samples_per_second": 124.219, |
|
"eval_steps_per_second": 3.908, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9442604856512141, |
|
"eval_loss": 3.7879128456115723, |
|
"eval_runtime": 14.6963, |
|
"eval_samples_per_second": 123.296, |
|
"eval_steps_per_second": 3.879, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9464679911699779, |
|
"eval_loss": 3.7172069549560547, |
|
"eval_runtime": 14.7356, |
|
"eval_samples_per_second": 122.968, |
|
"eval_steps_per_second": 3.868, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9481236203090507, |
|
"eval_loss": 3.6492252349853516, |
|
"eval_runtime": 20.1559, |
|
"eval_samples_per_second": 89.899, |
|
"eval_steps_per_second": 2.828, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 4.140969162995595e-06, |
|
"loss": 3.6551, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9503311258278145, |
|
"eval_loss": 3.5814554691314697, |
|
"eval_runtime": 17.2491, |
|
"eval_samples_per_second": 105.049, |
|
"eval_steps_per_second": 3.305, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9514348785871964, |
|
"eval_loss": 3.517028331756592, |
|
"eval_runtime": 14.8364, |
|
"eval_samples_per_second": 122.132, |
|
"eval_steps_per_second": 3.842, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9503311258278145, |
|
"eval_loss": 3.4523494243621826, |
|
"eval_runtime": 14.9385, |
|
"eval_samples_per_second": 121.297, |
|
"eval_steps_per_second": 3.816, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9541942604856513, |
|
"eval_loss": 3.390517234802246, |
|
"eval_runtime": 14.7336, |
|
"eval_samples_per_second": 122.984, |
|
"eval_steps_per_second": 3.869, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 21.93, |
|
"learning_rate": 3.920704845814978e-06, |
|
"loss": 3.34, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9558498896247241, |
|
"eval_loss": 3.3292999267578125, |
|
"eval_runtime": 14.7422, |
|
"eval_samples_per_second": 122.913, |
|
"eval_steps_per_second": 3.866, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9552980132450332, |
|
"eval_loss": 3.2692325115203857, |
|
"eval_runtime": 14.916, |
|
"eval_samples_per_second": 121.48, |
|
"eval_steps_per_second": 3.821, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9564017660044151, |
|
"eval_loss": 3.2137107849121094, |
|
"eval_runtime": 14.7629, |
|
"eval_samples_per_second": 122.74, |
|
"eval_steps_per_second": 3.861, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9575055187637969, |
|
"eval_loss": 3.156517267227173, |
|
"eval_runtime": 14.7516, |
|
"eval_samples_per_second": 122.834, |
|
"eval_steps_per_second": 3.864, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9591611479028698, |
|
"eval_loss": 3.1020307540893555, |
|
"eval_runtime": 14.6198, |
|
"eval_samples_per_second": 123.942, |
|
"eval_steps_per_second": 3.899, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 3.7004405286343614e-06, |
|
"loss": 3.062, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9580573951434879, |
|
"eval_loss": 3.047849178314209, |
|
"eval_runtime": 14.747, |
|
"eval_samples_per_second": 122.872, |
|
"eval_steps_per_second": 3.865, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9597130242825607, |
|
"eval_loss": 2.9951817989349365, |
|
"eval_runtime": 14.6085, |
|
"eval_samples_per_second": 124.038, |
|
"eval_steps_per_second": 3.902, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9608167770419426, |
|
"eval_loss": 2.9445724487304688, |
|
"eval_runtime": 14.744, |
|
"eval_samples_per_second": 122.897, |
|
"eval_steps_per_second": 3.866, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9641280353200883, |
|
"eval_loss": 2.8922934532165527, |
|
"eval_runtime": 14.7257, |
|
"eval_samples_per_second": 123.05, |
|
"eval_steps_per_second": 3.871, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 30.7, |
|
"learning_rate": 3.4801762114537445e-06, |
|
"loss": 2.8103, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.9635761589403974, |
|
"eval_loss": 2.8442115783691406, |
|
"eval_runtime": 14.5837, |
|
"eval_samples_per_second": 124.248, |
|
"eval_steps_per_second": 3.908, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9624724061810155, |
|
"eval_loss": 2.7976019382476807, |
|
"eval_runtime": 14.8078, |
|
"eval_samples_per_second": 122.368, |
|
"eval_steps_per_second": 3.849, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9663355408388521, |
|
"eval_loss": 2.749441623687744, |
|
"eval_runtime": 14.8845, |
|
"eval_samples_per_second": 121.737, |
|
"eval_steps_per_second": 3.829, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9646799116997793, |
|
"eval_loss": 2.702268600463867, |
|
"eval_runtime": 14.6998, |
|
"eval_samples_per_second": 123.267, |
|
"eval_steps_per_second": 3.878, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.9646799116997793, |
|
"eval_loss": 2.656121253967285, |
|
"eval_runtime": 14.9899, |
|
"eval_samples_per_second": 120.881, |
|
"eval_steps_per_second": 3.803, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 35.09, |
|
"learning_rate": 3.259911894273128e-06, |
|
"loss": 2.5826, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9657836644591612, |
|
"eval_loss": 2.613430976867676, |
|
"eval_runtime": 14.4375, |
|
"eval_samples_per_second": 125.507, |
|
"eval_steps_per_second": 3.948, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9663355408388521, |
|
"eval_loss": 2.5684773921966553, |
|
"eval_runtime": 14.8268, |
|
"eval_samples_per_second": 122.212, |
|
"eval_steps_per_second": 3.844, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.9685430463576159, |
|
"eval_loss": 2.5256078243255615, |
|
"eval_runtime": 14.6038, |
|
"eval_samples_per_second": 124.078, |
|
"eval_steps_per_second": 3.903, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9696467991169978, |
|
"eval_loss": 2.4859821796417236, |
|
"eval_runtime": 14.7498, |
|
"eval_samples_per_second": 122.849, |
|
"eval_steps_per_second": 3.864, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 3.039647577092511e-06, |
|
"loss": 2.3758, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.967439293598234, |
|
"eval_loss": 2.4448373317718506, |
|
"eval_runtime": 14.4892, |
|
"eval_samples_per_second": 125.058, |
|
"eval_steps_per_second": 3.934, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.9707505518763797, |
|
"eval_loss": 2.4062910079956055, |
|
"eval_runtime": 14.5027, |
|
"eval_samples_per_second": 124.943, |
|
"eval_steps_per_second": 3.93, |
|
"step": 4674 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9696467991169978, |
|
"eval_loss": 2.366445302963257, |
|
"eval_runtime": 14.5967, |
|
"eval_samples_per_second": 124.138, |
|
"eval_steps_per_second": 3.905, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.9696467991169978, |
|
"eval_loss": 2.3301877975463867, |
|
"eval_runtime": 15.4794, |
|
"eval_samples_per_second": 117.059, |
|
"eval_steps_per_second": 3.682, |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 43.86, |
|
"learning_rate": 2.819383259911895e-06, |
|
"loss": 2.1901, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9685430463576159, |
|
"eval_loss": 2.292541980743408, |
|
"eval_runtime": 14.8646, |
|
"eval_samples_per_second": 121.9, |
|
"eval_steps_per_second": 3.835, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9713024282560706, |
|
"eval_loss": 2.258281707763672, |
|
"eval_runtime": 14.6506, |
|
"eval_samples_per_second": 123.681, |
|
"eval_steps_per_second": 3.891, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.9718543046357616, |
|
"eval_loss": 2.2219057083129883, |
|
"eval_runtime": 14.6119, |
|
"eval_samples_per_second": 124.009, |
|
"eval_steps_per_second": 3.901, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.9713024282560706, |
|
"eval_loss": 2.1887381076812744, |
|
"eval_runtime": 14.7799, |
|
"eval_samples_per_second": 122.599, |
|
"eval_steps_per_second": 3.857, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.9724061810154525, |
|
"eval_loss": 2.15564227104187, |
|
"eval_runtime": 14.5803, |
|
"eval_samples_per_second": 124.277, |
|
"eval_steps_per_second": 3.909, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 48.25, |
|
"learning_rate": 2.599118942731278e-06, |
|
"loss": 2.0234, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.9718543046357616, |
|
"eval_loss": 2.1227617263793945, |
|
"eval_runtime": 14.678, |
|
"eval_samples_per_second": 123.45, |
|
"eval_steps_per_second": 3.883, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.9718543046357616, |
|
"eval_loss": 2.0911219120025635, |
|
"eval_runtime": 14.5634, |
|
"eval_samples_per_second": 124.421, |
|
"eval_steps_per_second": 3.914, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.9718543046357616, |
|
"eval_loss": 2.058539628982544, |
|
"eval_runtime": 14.5546, |
|
"eval_samples_per_second": 124.497, |
|
"eval_steps_per_second": 3.916, |
|
"step": 5814 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.9724061810154525, |
|
"eval_loss": 2.030808448791504, |
|
"eval_runtime": 14.7159, |
|
"eval_samples_per_second": 123.132, |
|
"eval_steps_per_second": 3.873, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 2.378854625550661e-06, |
|
"loss": 1.876, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9729580573951435, |
|
"eval_loss": 1.999698281288147, |
|
"eval_runtime": 14.624, |
|
"eval_samples_per_second": 123.906, |
|
"eval_steps_per_second": 3.898, |
|
"step": 6042 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9729580573951435, |
|
"eval_loss": 1.9714460372924805, |
|
"eval_runtime": 14.8384, |
|
"eval_samples_per_second": 122.116, |
|
"eval_steps_per_second": 3.841, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9724061810154525, |
|
"eval_loss": 1.944514274597168, |
|
"eval_runtime": 14.5863, |
|
"eval_samples_per_second": 124.226, |
|
"eval_steps_per_second": 3.908, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9718543046357616, |
|
"eval_loss": 1.9172853231430054, |
|
"eval_runtime": 14.6007, |
|
"eval_samples_per_second": 124.103, |
|
"eval_steps_per_second": 3.904, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9724061810154525, |
|
"eval_loss": 1.8919694423675537, |
|
"eval_runtime": 14.4525, |
|
"eval_samples_per_second": 125.377, |
|
"eval_steps_per_second": 3.944, |
|
"step": 6498 |
|
}, |
|
{ |
|
"epoch": 57.02, |
|
"learning_rate": 2.1585903083700445e-06, |
|
"loss": 1.7451, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.8659210205078125, |
|
"eval_runtime": 14.7567, |
|
"eval_samples_per_second": 122.791, |
|
"eval_steps_per_second": 3.863, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.9729580573951435, |
|
"eval_loss": 1.842057704925537, |
|
"eval_runtime": 14.733, |
|
"eval_samples_per_second": 122.989, |
|
"eval_steps_per_second": 3.869, |
|
"step": 6726 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.8180880546569824, |
|
"eval_runtime": 14.5204, |
|
"eval_samples_per_second": 124.79, |
|
"eval_steps_per_second": 3.926, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9735099337748344, |
|
"eval_loss": 1.7939091920852661, |
|
"eval_runtime": 14.6724, |
|
"eval_samples_per_second": 123.497, |
|
"eval_steps_per_second": 3.885, |
|
"step": 6954 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 1.9383259911894276e-06, |
|
"loss": 1.6286, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.9729580573951435, |
|
"eval_loss": 1.7723444700241089, |
|
"eval_runtime": 14.599, |
|
"eval_samples_per_second": 124.118, |
|
"eval_steps_per_second": 3.904, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.7489854097366333, |
|
"eval_runtime": 14.5258, |
|
"eval_samples_per_second": 124.743, |
|
"eval_steps_per_second": 3.924, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.9757174392935982, |
|
"eval_loss": 1.7285088300704956, |
|
"eval_runtime": 14.4828, |
|
"eval_samples_per_second": 125.114, |
|
"eval_steps_per_second": 3.936, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.7077668905258179, |
|
"eval_runtime": 14.7222, |
|
"eval_samples_per_second": 123.079, |
|
"eval_steps_per_second": 3.872, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"learning_rate": 1.7180616740088108e-06, |
|
"loss": 1.5281, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.6885572671890259, |
|
"eval_runtime": 14.7611, |
|
"eval_samples_per_second": 122.755, |
|
"eval_steps_per_second": 3.862, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.6697474718093872, |
|
"eval_runtime": 14.5138, |
|
"eval_samples_per_second": 124.846, |
|
"eval_steps_per_second": 3.927, |
|
"step": 7638 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.6512248516082764, |
|
"eval_runtime": 14.4806, |
|
"eval_samples_per_second": 125.133, |
|
"eval_steps_per_second": 3.936, |
|
"step": 7752 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.9735099337748344, |
|
"eval_loss": 1.6349101066589355, |
|
"eval_runtime": 14.6474, |
|
"eval_samples_per_second": 123.708, |
|
"eval_steps_per_second": 3.891, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.9729580573951435, |
|
"eval_loss": 1.6167147159576416, |
|
"eval_runtime": 14.5436, |
|
"eval_samples_per_second": 124.591, |
|
"eval_steps_per_second": 3.919, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 70.18, |
|
"learning_rate": 1.497797356828194e-06, |
|
"loss": 1.4403, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.9729580573951435, |
|
"eval_loss": 1.600691556930542, |
|
"eval_runtime": 14.4531, |
|
"eval_samples_per_second": 125.371, |
|
"eval_steps_per_second": 3.944, |
|
"step": 8094 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.5854783058166504, |
|
"eval_runtime": 14.6051, |
|
"eval_samples_per_second": 124.067, |
|
"eval_steps_per_second": 3.903, |
|
"step": 8208 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.9751655629139073, |
|
"eval_loss": 1.5697646141052246, |
|
"eval_runtime": 14.7222, |
|
"eval_samples_per_second": 123.079, |
|
"eval_steps_per_second": 3.872, |
|
"step": 8322 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9735099337748344, |
|
"eval_loss": 1.5558655261993408, |
|
"eval_runtime": 14.7278, |
|
"eval_samples_per_second": 123.033, |
|
"eval_steps_per_second": 3.87, |
|
"step": 8436 |
|
}, |
|
{ |
|
"epoch": 74.56, |
|
"learning_rate": 1.2775330396475772e-06, |
|
"loss": 1.3684, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.5405701398849487, |
|
"eval_runtime": 14.4937, |
|
"eval_samples_per_second": 125.02, |
|
"eval_steps_per_second": 3.933, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9735099337748344, |
|
"eval_loss": 1.52810537815094, |
|
"eval_runtime": 14.5545, |
|
"eval_samples_per_second": 124.497, |
|
"eval_steps_per_second": 3.916, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9735099337748344, |
|
"eval_loss": 1.5153011083602905, |
|
"eval_runtime": 14.6637, |
|
"eval_samples_per_second": 123.57, |
|
"eval_steps_per_second": 3.887, |
|
"step": 8778 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.5028570890426636, |
|
"eval_runtime": 14.5939, |
|
"eval_samples_per_second": 124.162, |
|
"eval_steps_per_second": 3.906, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 1.0572687224669604e-06, |
|
"loss": 1.3041, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.4909206628799438, |
|
"eval_runtime": 14.5501, |
|
"eval_samples_per_second": 124.535, |
|
"eval_steps_per_second": 3.918, |
|
"step": 9006 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.4797635078430176, |
|
"eval_runtime": 14.5945, |
|
"eval_samples_per_second": 124.157, |
|
"eval_steps_per_second": 3.906, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.9751655629139073, |
|
"eval_loss": 1.469772219657898, |
|
"eval_runtime": 14.868, |
|
"eval_samples_per_second": 121.873, |
|
"eval_steps_per_second": 3.834, |
|
"step": 9234 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.4597550630569458, |
|
"eval_runtime": 14.5863, |
|
"eval_samples_per_second": 124.226, |
|
"eval_steps_per_second": 3.908, |
|
"step": 9348 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.4507455825805664, |
|
"eval_runtime": 14.5654, |
|
"eval_samples_per_second": 124.404, |
|
"eval_steps_per_second": 3.913, |
|
"step": 9462 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 8.370044052863437e-07, |
|
"loss": 1.2554, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9735099337748344, |
|
"eval_loss": 1.4420080184936523, |
|
"eval_runtime": 14.7476, |
|
"eval_samples_per_second": 122.868, |
|
"eval_steps_per_second": 3.865, |
|
"step": 9576 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.433323860168457, |
|
"eval_runtime": 14.5881, |
|
"eval_samples_per_second": 124.211, |
|
"eval_steps_per_second": 3.907, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.4266729354858398, |
|
"eval_runtime": 14.6204, |
|
"eval_samples_per_second": 123.937, |
|
"eval_steps_per_second": 3.899, |
|
"step": 9804 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.419144630432129, |
|
"eval_runtime": 14.7023, |
|
"eval_samples_per_second": 123.246, |
|
"eval_steps_per_second": 3.877, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 87.72, |
|
"learning_rate": 6.16740088105727e-07, |
|
"loss": 1.2166, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.41357421875, |
|
"eval_runtime": 14.7197, |
|
"eval_samples_per_second": 123.101, |
|
"eval_steps_per_second": 3.872, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.9751655629139073, |
|
"eval_loss": 1.4069515466690063, |
|
"eval_runtime": 14.5913, |
|
"eval_samples_per_second": 124.184, |
|
"eval_steps_per_second": 3.906, |
|
"step": 10146 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9751655629139073, |
|
"eval_loss": 1.4011104106903076, |
|
"eval_runtime": 14.4858, |
|
"eval_samples_per_second": 125.088, |
|
"eval_steps_per_second": 3.935, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.9751655629139073, |
|
"eval_loss": 1.396867036819458, |
|
"eval_runtime": 14.4869, |
|
"eval_samples_per_second": 125.079, |
|
"eval_steps_per_second": 3.935, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9751655629139073, |
|
"eval_loss": 1.3927924633026123, |
|
"eval_runtime": 14.9674, |
|
"eval_samples_per_second": 121.063, |
|
"eval_steps_per_second": 3.808, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 92.11, |
|
"learning_rate": 3.964757709251102e-07, |
|
"loss": 1.1885, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.3884556293487549, |
|
"eval_runtime": 15.051, |
|
"eval_samples_per_second": 120.39, |
|
"eval_steps_per_second": 3.787, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.9740618101545254, |
|
"eval_loss": 1.3855311870574951, |
|
"eval_runtime": 14.5082, |
|
"eval_samples_per_second": 124.895, |
|
"eval_steps_per_second": 3.929, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.3824039697647095, |
|
"eval_runtime": 14.8975, |
|
"eval_samples_per_second": 121.631, |
|
"eval_steps_per_second": 3.826, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.3800771236419678, |
|
"eval_runtime": 14.7571, |
|
"eval_samples_per_second": 122.788, |
|
"eval_steps_per_second": 3.863, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 96.49, |
|
"learning_rate": 1.7621145374449343e-07, |
|
"loss": 1.1703, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.378131628036499, |
|
"eval_runtime": 14.7894, |
|
"eval_samples_per_second": 122.52, |
|
"eval_steps_per_second": 3.854, |
|
"step": 11058 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.3769623041152954, |
|
"eval_runtime": 14.5124, |
|
"eval_samples_per_second": 124.858, |
|
"eval_steps_per_second": 3.928, |
|
"step": 11172 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.3762928247451782, |
|
"eval_runtime": 14.7379, |
|
"eval_samples_per_second": 122.948, |
|
"eval_steps_per_second": 3.868, |
|
"step": 11286 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9746136865342163, |
|
"eval_loss": 1.375998616218567, |
|
"eval_runtime": 15.1012, |
|
"eval_samples_per_second": 119.991, |
|
"eval_steps_per_second": 3.775, |
|
"step": 11400 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11400, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 5.624120064761856e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|