|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998971722365039, |
|
"global_step": 4862, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0, |
|
"loss": 11.981, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.9970117109141705e-05, |
|
"loss": 12.0789, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.331893267009234e-05, |
|
"loss": 10.7133, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011407670594843083, |
|
"loss": 8.7339, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012721122651399258, |
|
"loss": 8.301, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013684136855727938, |
|
"loss": 8.1964, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.10310832025117739, |
|
"eval_loss": 8.203125, |
|
"eval_runtime": 6.5764, |
|
"eval_samples_per_second": 5.322, |
|
"eval_steps_per_second": 0.304, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014444862339428802, |
|
"loss": 8.0553, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015073705430110066, |
|
"loss": 7.9436, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00015609707636042195, |
|
"loss": 7.8368, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00016076788727202945, |
|
"loss": 7.7333, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00016490670495758757, |
|
"loss": 7.6139, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.12459794174079888, |
|
"eval_loss": 7.81640625, |
|
"eval_runtime": 6.6059, |
|
"eval_samples_per_second": 5.298, |
|
"eval_steps_per_second": 0.303, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001686224178807056, |
|
"loss": 7.4892, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000171993565594773, |
|
"loss": 7.3256, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00017507866443784335, |
|
"loss": 7.1827, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001779224840062419, |
|
"loss": 6.9698, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00018056004207494319, |
|
"loss": 6.8162, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.1567556253270539, |
|
"eval_loss": 7.08203125, |
|
"eval_runtime": 6.5908, |
|
"eval_samples_per_second": 5.31, |
|
"eval_steps_per_second": 0.303, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00018301924610008189, |
|
"loss": 6.6293, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00018532269677939782, |
|
"loss": 6.4114, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00018748895370481112, |
|
"loss": 6.2911, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00018953344483335556, |
|
"loss": 6.1047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019146913367833817, |
|
"loss": 5.9957, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.19811616954474098, |
|
"eval_loss": 6.4296875, |
|
"eval_runtime": 6.5897, |
|
"eval_samples_per_second": 5.311, |
|
"eval_steps_per_second": 0.304, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019330701776944063, |
|
"loss": 5.8281, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019505650713185044, |
|
"loss": 5.6927, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019672571585424665, |
|
"loss": 5.5564, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019832168964685297, |
|
"loss": 5.3813, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001998505855457085, |
|
"loss": 5.2496, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.24055817198674342, |
|
"eval_loss": 5.8203125, |
|
"eval_runtime": 6.6013, |
|
"eval_samples_per_second": 5.302, |
|
"eval_steps_per_second": 0.303, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001996535296665223, |
|
"loss": 5.13, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001992204417496752, |
|
"loss": 5.0354, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019878735383282807, |
|
"loss": 4.9021, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019835426591598097, |
|
"loss": 4.8181, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019792117799913384, |
|
"loss": 4.6993, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.2641758241758242, |
|
"eval_loss": 5.41796875, |
|
"eval_runtime": 6.6009, |
|
"eval_samples_per_second": 5.302, |
|
"eval_steps_per_second": 0.303, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001974880900822867, |
|
"loss": 4.6761, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019705500216543958, |
|
"loss": 4.5908, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019662191424859245, |
|
"loss": 4.5301, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019618882633174535, |
|
"loss": 4.4729, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019575573841489822, |
|
"loss": 4.3928, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.27927437641723357, |
|
"eval_loss": 5.14453125, |
|
"eval_runtime": 6.6101, |
|
"eval_samples_per_second": 5.295, |
|
"eval_steps_per_second": 0.303, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019532265049805112, |
|
"loss": 4.4083, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000194889562581204, |
|
"loss": 4.336, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001944564746643569, |
|
"loss": 4.2714, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019402338674750976, |
|
"loss": 4.2124, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019359029883066263, |
|
"loss": 4.2395, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.2894470608756323, |
|
"eval_loss": 4.96875, |
|
"eval_runtime": 6.5945, |
|
"eval_samples_per_second": 5.307, |
|
"eval_steps_per_second": 0.303, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001931572109138155, |
|
"loss": 4.1867, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001927241229969684, |
|
"loss": 4.1687, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019229103508012127, |
|
"loss": 4.1027, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019185794716327414, |
|
"loss": 4.1233, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019142485924642704, |
|
"loss": 4.0781, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.29638932496075354, |
|
"eval_loss": 4.8515625, |
|
"eval_runtime": 6.5996, |
|
"eval_samples_per_second": 5.303, |
|
"eval_steps_per_second": 0.303, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001909917713295799, |
|
"loss": 4.0855, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001905586834127328, |
|
"loss": 4.0859, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019012559549588568, |
|
"loss": 4.0124, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018969250757903855, |
|
"loss": 4.0151, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018925941966219142, |
|
"loss": 4.0409, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.30181057038199893, |
|
"eval_loss": 4.76953125, |
|
"eval_runtime": 6.6117, |
|
"eval_samples_per_second": 5.294, |
|
"eval_steps_per_second": 0.302, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018882633174534431, |
|
"loss": 3.9912, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018839324382849718, |
|
"loss": 3.9383, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018796015591165008, |
|
"loss": 3.9764, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018752706799480295, |
|
"loss": 3.9809, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018709398007795585, |
|
"loss": 3.9178, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.30575963718820864, |
|
"eval_loss": 4.703125, |
|
"eval_runtime": 6.6265, |
|
"eval_samples_per_second": 5.282, |
|
"eval_steps_per_second": 0.302, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018666089216110872, |
|
"loss": 3.9073, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001862278042442616, |
|
"loss": 3.9459, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018579471632741446, |
|
"loss": 3.9535, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018536162841056733, |
|
"loss": 3.8982, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018492854049372023, |
|
"loss": 3.834, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.3082574568288854, |
|
"eval_loss": 4.65625, |
|
"eval_runtime": 6.6135, |
|
"eval_samples_per_second": 5.292, |
|
"eval_steps_per_second": 0.302, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001844954525768731, |
|
"loss": 3.8759, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000184062364660026, |
|
"loss": 3.8835, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018362927674317887, |
|
"loss": 3.9003, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018319618882633177, |
|
"loss": 3.8538, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018276310090948464, |
|
"loss": 3.8316, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.30981336124193265, |
|
"eval_loss": 4.625, |
|
"eval_runtime": 6.6039, |
|
"eval_samples_per_second": 5.3, |
|
"eval_steps_per_second": 0.303, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018233001299263754, |
|
"loss": 3.9085, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018189692507579038, |
|
"loss": 3.7825, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.14, |