|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.0, |
|
"global_step": 4134, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3832186408159307e-05, |
|
"loss": 2.0685, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6720276077614272, |
|
"eval_loss": 1.7388017177581787, |
|
"eval_runtime": 36.8297, |
|
"eval_samples_per_second": 48.059, |
|
"eval_steps_per_second": 0.163, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5888124272106204e-05, |
|
"loss": 1.7284, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6931985254177139, |
|
"eval_loss": 1.5560609102249146, |
|
"eval_runtime": 37.6763, |
|
"eval_samples_per_second": 46.979, |
|
"eval_steps_per_second": 0.159, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7090770826327895e-05, |
|
"loss": 1.5997, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7099605218744038, |
|
"eval_loss": 1.4386627674102783, |
|
"eval_runtime": 36.094, |
|
"eval_samples_per_second": 49.039, |
|
"eval_steps_per_second": 0.166, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7944062136053104e-05, |
|
"loss": 1.5195, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7235852020285128, |
|
"eval_loss": 1.3606762886047363, |
|
"eval_runtime": 37.6742, |
|
"eval_samples_per_second": 46.982, |
|
"eval_steps_per_second": 0.159, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.860592629580032e-05, |
|
"loss": 1.4706, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7306072855931103, |
|
"eval_loss": 1.3053652048110962, |
|
"eval_runtime": 36.8913, |
|
"eval_samples_per_second": 47.979, |
|
"eval_steps_per_second": 0.163, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.9146708690274792e-05, |
|
"loss": 1.4153, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7296665866066945, |
|
"eval_loss": 1.3213350772857666, |
|
"eval_runtime": 37.6502, |
|
"eval_samples_per_second": 47.012, |
|
"eval_steps_per_second": 0.159, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9603933689955228e-05, |
|
"loss": 1.3838, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7412067658165602, |
|
"eval_loss": 1.2423616647720337, |
|
"eval_runtime": 36.7828, |
|
"eval_samples_per_second": 48.12, |
|
"eval_steps_per_second": 0.163, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3512, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7385993664885682, |
|
"eval_loss": 1.2403146028518677, |
|
"eval_runtime": 37.6141, |
|
"eval_samples_per_second": 47.057, |
|
"eval_steps_per_second": 0.16, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3188, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7413660954138545, |
|
"eval_loss": 1.2292009592056274, |
|
"eval_runtime": 36.8608, |
|
"eval_samples_per_second": 48.019, |
|
"eval_steps_per_second": 0.163, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3098, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7530054891006436, |
|
"eval_loss": 1.1540861129760742, |
|
"eval_runtime": 36.7518, |
|
"eval_samples_per_second": 48.161, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2827, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7532555843965927, |
|
"eval_loss": 1.1605820655822754, |
|
"eval_runtime": 37.6556, |
|
"eval_samples_per_second": 47.005, |
|
"eval_steps_per_second": 0.159, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2693, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7515547286193142, |
|
"eval_loss": 1.167082667350769, |
|
"eval_runtime": 36.7311, |
|
"eval_samples_per_second": 48.188, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2521, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7581785009494731, |
|
"eval_loss": 1.134334921836853, |
|
"eval_runtime": 37.5802, |
|
"eval_samples_per_second": 47.099, |
|
"eval_steps_per_second": 0.16, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2421, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7592175982523275, |
|
"eval_loss": 1.1171754598617554, |
|
"eval_runtime": 36.9701, |
|
"eval_samples_per_second": 47.877, |
|
"eval_steps_per_second": 0.162, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2308, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7617716097984533, |
|
"eval_loss": 1.1091045141220093, |
|
"eval_runtime": 37.8672, |
|
"eval_samples_per_second": 46.742, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2132, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7612774124438361, |
|
"eval_loss": 1.1064728498458862, |
|
"eval_runtime": 36.7747, |
|
"eval_samples_per_second": 48.131, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2055, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.761372646367941, |
|
"eval_loss": 1.1087865829467773, |
|
"eval_runtime": 36.8483, |
|
"eval_samples_per_second": 48.035, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1931, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7599678162601031, |
|
"eval_loss": 1.1089411973953247, |
|
"eval_runtime": 35.9217, |
|
"eval_samples_per_second": 49.274, |
|
"eval_steps_per_second": 0.167, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1815, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7659135481142874, |
|
"eval_loss": 1.0751391649246216, |
|
"eval_runtime": 36.8871, |
|
"eval_samples_per_second": 47.984, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1728, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.768600214617861, |
|
"eval_loss": 1.069868803024292, |
|
"eval_runtime": 36.7387, |
|
"eval_samples_per_second": 48.178, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.164, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7675022113058035, |
|
"eval_loss": 1.065330147743225, |
|
"eval_runtime": 36.8047, |
|
"eval_samples_per_second": 48.092, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1524, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7698299863752147, |
|
"eval_loss": 1.0548479557037354, |
|
"eval_runtime": 36.7052, |
|
"eval_samples_per_second": 48.222, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1425, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.769697828632807, |
|
"eval_loss": 1.047703504562378, |
|
"eval_runtime": 36.8466, |
|
"eval_samples_per_second": 48.037, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.143, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7772214047626197, |
|
"eval_loss": 1.0133404731750488, |
|
"eval_runtime": 36.8371, |
|
"eval_samples_per_second": 48.049, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1308, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7741821838279008, |
|
"eval_loss": 1.0260401964187622, |
|
"eval_runtime": 36.8437, |
|
"eval_samples_per_second": 48.041, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1271, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.773661286574258, |
|
"eval_loss": 1.0230038166046143, |
|
"eval_runtime": 36.8925, |
|
"eval_samples_per_second": 47.977, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1202, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7754215166511026, |
|
"eval_loss": 1.0241199731826782, |
|
"eval_runtime": 36.7992, |
|
"eval_samples_per_second": 48.099, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1168, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7775761513243871, |
|
"eval_loss": 1.0062930583953857, |
|
"eval_runtime": 38.045, |
|
"eval_samples_per_second": 46.524, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1019, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7796242613030875, |
|
"eval_loss": 0.9990780353546143, |
|
"eval_runtime": 37.599, |
|
"eval_samples_per_second": 47.076, |
|
"eval_steps_per_second": 0.16, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1071, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7805559249900252, |
|
"eval_loss": 0.991283655166626, |
|
"eval_runtime": 36.7892, |
|
"eval_samples_per_second": 48.112, |
|
"eval_steps_per_second": 0.163, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0963, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7864944315242504, |
|
"eval_loss": 0.9553370475769043, |
|
"eval_runtime": 36.7173, |
|
"eval_samples_per_second": 48.206, |
|
"eval_steps_per_second": 0.163, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.089, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.783273055389144, |
|
"eval_loss": 0.9850459098815918, |
|
"eval_runtime": 36.7845, |
|
"eval_samples_per_second": 48.118, |
|
"eval_steps_per_second": 0.163, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0807, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7774915196015286, |
|
"eval_loss": 0.9987505674362183, |
|
"eval_runtime": 36.8264, |
|
"eval_samples_per_second": 48.063, |
|
"eval_steps_per_second": 0.163, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0735, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7789523242042387, |
|
"eval_loss": 0.9969209432601929, |
|
"eval_runtime": 37.6346, |
|
"eval_samples_per_second": 47.031, |
|
"eval_steps_per_second": 0.159, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0766, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7880373776195184, |
|
"eval_loss": 0.9506202936172485, |
|
"eval_runtime": 36.9744, |
|
"eval_samples_per_second": 47.871, |
|
"eval_steps_per_second": 0.162, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0698, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7880023166711217, |
|
"eval_loss": 0.9565942883491516, |
|
"eval_runtime": 38.7958, |
|
"eval_samples_per_second": 45.623, |
|
"eval_steps_per_second": 0.155, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0608, |
|
"step": 3922 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7855791137596568, |
|
"eval_loss": 0.9620457291603088, |
|
"eval_runtime": 37.7795, |
|
"eval_samples_per_second": 46.851, |
|
"eval_steps_per_second": 0.159, |
|
"step": 3922 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0543, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7822399039183305, |
|
"eval_loss": 0.9812787175178528, |
|
"eval_runtime": 36.8483, |
|
"eval_samples_per_second": 48.035, |
|
"eval_steps_per_second": 0.163, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0521, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7884438270952546, |
|
"eval_loss": 0.9478756785392761, |
|
"eval_runtime": 36.7861, |
|
"eval_samples_per_second": 48.116, |
|
"eval_steps_per_second": 0.163, |
|
"step": 4134 |
|
} |
|
], |
|
"max_steps": 4240, |
|
"num_train_epochs": 40, |
|
"total_flos": 498219970723840.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|