|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.217287974468663, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 2.5689, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.410009602540877e-06, |
|
"loss": 2.2872, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.899727994397217e-06, |
|
"loss": 2.2045, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.242838952070665e-06, |
|
"loss": 2.1665, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.507189546742703e-06, |
|
"loss": 2.0691, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.722272031045718e-06, |
|
"loss": 2.0554, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9035975752993475e-06, |
|
"loss": 2.0278, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.060338910107844e-06, |
|
"loss": 2.0495, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.1983733510531e-06, |
|
"loss": 2.0476, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.3216938522601335e-06, |
|
"loss": 2.0547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.433137327841662e-06, |
|
"loss": 1.992, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.534791897046749e-06, |
|
"loss": 2.0016, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.6282392274646635e-06, |
|
"loss": 2.0251, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.71470613401092e-06, |
|
"loss": 1.9642, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.795163382432731e-06, |
|
"loss": 1.9489, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.870392331521204e-06, |
|
"loss": 1.9492, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.941031212303129e-06, |
|
"loss": 1.9794, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.007608082613048e-06, |
|
"loss": 1.9443, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.070564811128651e-06, |
|
"loss": 1.9561, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.130274870425884e-06, |
|
"loss": 1.9445, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.187056762962811e-06, |
|
"loss": 1.9922, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.241184306240316e-06, |
|
"loss": 1.9092, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.292894619573035e-06, |
|
"loss": 1.9877, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.3423944025283676e-06, |
|
"loss": 1.9013, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.385204482203244e-06, |
|
"loss": 1.9351, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.430985882568613e-06, |
|
"loss": 1.9364, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.475026117410103e-06, |
|
"loss": 1.9298, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.51745278911487e-06, |
|
"loss": 1.9262, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.55837996735795e-06, |
|
"loss": 1.9245, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.597910037536681e-06, |
|
"loss": 1.9264, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.6361352440011835e-06, |
|
"loss": 1.9318, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.673138986625154e-06, |
|
"loss": 1.8992, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.708996916564478e-06, |
|
"loss": 1.8958, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.74377786740708e-06, |
|
"loss": 1.9317, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.777544650524216e-06, |
|
"loss": 1.8891, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.810354737716999e-06, |
|
"loss": 1.8944, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.842260849796047e-06, |
|
"loss": 1.8904, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.873311466232601e-06, |
|
"loss": 1.9326, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.903551268251219e-06, |
|
"loss": 1.8795, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.933021525529835e-06, |
|
"loss": 1.8729, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.961760434906346e-06, |
|
"loss": 1.861, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.9898034180667605e-06, |
|
"loss": 1.9003, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.017183384035125e-06, |
|
"loss": 1.9061, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.043930961344266e-06, |
|
"loss": 1.9146, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.07007470399505e-06, |
|
"loss": 1.8741, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.095641274676984e-06, |
|
"loss": 1.8402, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.120655608197665e-06, |
|
"loss": 1.9109, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.145141057632318e-06, |
|
"loss": 1.8829, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.169119525340562e-06, |
|
"loss": 1.8557, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.19261158069258e-06, |
|
"loss": 1.8577, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_webgpt_accuracy": 0.4847224518236187, |
|
"eval_webgpt_loss": 2.392578125, |
|
"eval_webgpt_runtime": 337.627, |
|
"eval_webgpt_samples_per_second": 11.599, |
|
"eval_webgpt_steps_per_second": 0.726, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_prompt_dialogue_accuracy": 0.5612746203747981, |
|
"eval_prompt_dialogue_loss": 1.828125, |
|
"eval_prompt_dialogue_runtime": 811.1162, |
|
"eval_prompt_dialogue_samples_per_second": 12.71, |
|
"eval_prompt_dialogue_steps_per_second": 0.795, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_squad_v2_accuracy": 0.9115060111379335, |
|
"eval_squad_v2_loss": NaN, |
|
"eval_squad_v2_runtime": 941.3453, |
|
"eval_squad_v2_samples_per_second": 12.613, |
|
"eval_squad_v2_steps_per_second": 0.789, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_adversarial_qa_accuracy": 0.8083160800552105, |
|
"eval_adversarial_qa_loss": 0.86083984375, |
|
"eval_adversarial_qa_runtime": 238.8648, |
|
"eval_adversarial_qa_samples_per_second": 12.559, |
|
"eval_adversarial_qa_steps_per_second": 0.787, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_trivia_qa_nocontext_accuracy": 0.45998188422713815, |
|
"eval_trivia_qa_nocontext_loss": 3.12890625, |
|
"eval_trivia_qa_nocontext_runtime": 1249.2124, |
|
"eval_trivia_qa_nocontext_samples_per_second": 14.364, |
|
"eval_trivia_qa_nocontext_steps_per_second": 0.898, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_m2m_translation_accuracy": 0.5733300105250808, |
|
"eval_m2m_translation_loss": 1.8857421875, |
|
"eval_m2m_translation_runtime": 3725.0595, |
|
"eval_m2m_translation_samples_per_second": 14.208, |
|
"eval_m2m_translation_steps_per_second": 0.888, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_xsum_accuracy": 0.5671291686677834, |
|
"eval_xsum_loss": 1.9658203125, |
|
"eval_xsum_runtime": 1011.0399, |
|
"eval_xsum_samples_per_second": 11.208, |
|
"eval_xsum_steps_per_second": 0.701, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_cnn_dailymail_accuracy": 0.6565405592058271, |
|
"eval_cnn_dailymail_loss": 1.5810546875, |
|
"eval_cnn_dailymail_runtime": 1213.6108, |
|
"eval_cnn_dailymail_samples_per_second": 11.015, |
|
"eval_cnn_dailymail_steps_per_second": 0.689, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_multi_news_accuracy": 0.5175576159426271, |
|
"eval_multi_news_loss": 2.33203125, |
|
"eval_multi_news_runtime": 512.3687, |
|
"eval_multi_news_samples_per_second": 10.973, |
|
"eval_multi_news_steps_per_second": 0.687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_scitldr_accuracy": 0.46241087965899574, |
|
"eval_scitldr_loss": 2.5234375, |
|
"eval_scitldr_runtime": 57.2333, |
|
"eval_scitldr_samples_per_second": 10.815, |
|
"eval_scitldr_steps_per_second": 0.681, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_soda_accuracy": 0.7121364530942489, |
|
"eval_soda_loss": 1.21484375, |
|
"eval_soda_runtime": 82.3256, |
|
"eval_soda_samples_per_second": 12.159, |
|
"eval_soda_steps_per_second": 0.765, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_joke_accuracy": 0.454450170320552, |
|
"eval_joke_loss": 2.62109375, |
|
"eval_joke_runtime": 6.9296, |
|
"eval_joke_samples_per_second": 10.967, |
|
"eval_joke_steps_per_second": 0.722, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_gsm8k_accuracy": 0.8305793220560381, |
|
"eval_gsm8k_loss": 0.61328125, |
|
"eval_gsm8k_runtime": 113.5097, |
|
"eval_gsm8k_samples_per_second": 11.62, |
|
"eval_gsm8k_steps_per_second": 0.731, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_samsum_accuracy": 0.6179719084733047, |
|
"eval_samsum_loss": 1.5537109375, |
|
"eval_samsum_runtime": 73.3198, |
|
"eval_samsum_samples_per_second": 11.157, |
|
"eval_samsum_steps_per_second": 0.709, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.215636566090451e-06, |
|
"loss": 1.8655, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.238212692653954e-06, |
|
"loss": 1.9284, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.260357126756832e-06, |
|
"loss": 1.8522, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.282086068443704e-06, |
|
"loss": 1.8869, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.303414822624985e-06, |
|
"loss": 1.905, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.324357863833543e-06, |
|
"loss": 1.8483, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.344928895229384e-06, |
|
"loss": 1.8628, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.365140902454744e-06, |
|
"loss": 1.8982, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.385006202869703e-06, |
|
"loss": 1.834, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.404536490635762e-06, |
|
"loss": 1.8488, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.423742878060626e-06, |
|
"loss": 1.8585, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.442635933570175e-06, |
|
"loss": 1.8292, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.461225716632522e-06, |
|
"loss": 1.8399, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.479521809923099e-06, |
|
"loss": 1.8775, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.497533348988249e-06, |
|
"loss": 1.8593, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.515269049637215e-06, |
|
"loss": 1.8557, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.532737233268121e-06, |
|
"loss": 1.8427, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.5499458503121685e-06, |
|
"loss": 1.859, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.566902501961364e-06, |
|
"loss": 1.8111, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.5836144603284295e-06, |
|
"loss": 1.8246, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.600088687172717e-06, |
|
"loss": 1.8613, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.616331851312801e-06, |
|
"loss": 1.8478, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.632350344834765e-06, |
|
"loss": 1.8839, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.648150298194777e-06, |
|
"loss": 1.8625, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.663737594305231e-06, |
|
"loss": 1.8291, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.679117881685476e-06, |
|
"loss": 1.836, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.6942965867507e-06, |
|
"loss": 1.8102, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.709278925305836e-06, |
|
"loss": 1.8353, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.72406991330548e-06, |
|
"loss": 1.8713, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.738674376935279e-06, |
|
"loss": 1.8314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.753096962065562e-06, |
|
"loss": 1.8553, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.767342143123527e-06, |
|
"loss": 1.8318, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.781414231426347e-06, |
|
"loss": 1.8401, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.79531738301407e-06, |
|
"loss": 1.8108, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.809055606017904e-06, |
|
"loss": 1.8563, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.822632767596549e-06, |
|
"loss": 1.8408, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.836052600470646e-06, |
|
"loss": 1.8096, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.849318709082941e-06, |
|
"loss": 1.8264, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.862434575409602e-06, |
|
"loss": 1.8902, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.875403564446145e-06, |
|
"loss": 1.7953, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.888228929389561e-06, |
|
"loss": 1.837, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.900913816536647e-06, |
|
"loss": 1.7704, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.913461269916965e-06, |
|
"loss": 1.8251, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.925874235677506e-06, |
|
"loss": 1.8294, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.938155566234842e-06, |
|
"loss": 1.8306, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.950308024209451e-06, |
|
"loss": 1.7889, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.962334286155715e-06, |
|
"loss": 1.7922, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.974236946100272e-06, |
|
"loss": 1.7679, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.986018518900343e-06, |
|
"loss": 1.8068, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.997681443432989e-06, |
|
"loss": 1.8467, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_webgpt_accuracy": 0.4853852488269729, |
|
"eval_webgpt_loss": 2.388671875, |
|
"eval_webgpt_runtime": 337.66, |
|
"eval_webgpt_samples_per_second": 11.597, |
|
"eval_webgpt_steps_per_second": 0.726, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_prompt_dialogue_accuracy": 0.5656945584798752, |
|
"eval_prompt_dialogue_loss": 1.7978515625, |
|
"eval_prompt_dialogue_runtime": 810.7166, |
|
"eval_prompt_dialogue_samples_per_second": 12.716, |
|
"eval_prompt_dialogue_steps_per_second": 0.796, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_squad_v2_accuracy": 0.929180907765482, |
|
"eval_squad_v2_loss": NaN, |
|
"eval_squad_v2_runtime": 942.6897, |
|
"eval_squad_v2_samples_per_second": 12.595, |
|
"eval_squad_v2_steps_per_second": 0.788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_adversarial_qa_accuracy": 0.8106740280653324, |
|
"eval_adversarial_qa_loss": 0.8291015625, |
|
"eval_adversarial_qa_runtime": 239.9039, |
|
"eval_adversarial_qa_samples_per_second": 12.505, |
|
"eval_adversarial_qa_steps_per_second": 0.784, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_trivia_qa_nocontext_accuracy": 0.46837345154352583, |
|
"eval_trivia_qa_nocontext_loss": 3.052734375, |
|
"eval_trivia_qa_nocontext_runtime": 1248.9886, |
|
"eval_trivia_qa_nocontext_samples_per_second": 14.367, |
|
"eval_trivia_qa_nocontext_steps_per_second": 0.898, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_m2m_translation_accuracy": 0.6128791362753888, |
|
"eval_m2m_translation_loss": 1.6904296875, |
|
"eval_m2m_translation_runtime": 3724.3046, |
|
"eval_m2m_translation_samples_per_second": 14.21, |
|
"eval_m2m_translation_steps_per_second": 0.888, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_xsum_accuracy": 0.576201200101912, |
|
"eval_xsum_loss": 1.90625, |
|
"eval_xsum_runtime": 1013.837, |
|
"eval_xsum_samples_per_second": 11.177, |
|
"eval_xsum_steps_per_second": 0.699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_cnn_dailymail_accuracy": 0.6590085257947359, |
|
"eval_cnn_dailymail_loss": 1.5537109375, |
|
"eval_cnn_dailymail_runtime": 1212.8269, |
|
"eval_cnn_dailymail_samples_per_second": 11.022, |
|
"eval_cnn_dailymail_steps_per_second": 0.689, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_multi_news_accuracy": 0.5213214574389622, |
|
"eval_multi_news_loss": 2.302734375, |
|
"eval_multi_news_runtime": 514.4889, |
|
"eval_multi_news_samples_per_second": 10.927, |
|
"eval_multi_news_steps_per_second": 0.684, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_scitldr_accuracy": 0.4667123357182727, |
|
"eval_scitldr_loss": 2.50390625, |
|
"eval_scitldr_runtime": 55.8523, |
|
"eval_scitldr_samples_per_second": 11.083, |
|
"eval_scitldr_steps_per_second": 0.698, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_soda_accuracy": 0.7221954952984911, |
|
"eval_soda_loss": 1.1513671875, |
|
"eval_soda_runtime": 82.1743, |
|
"eval_soda_samples_per_second": 12.181, |
|
"eval_soda_steps_per_second": 0.767, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_joke_accuracy": 0.45549829679447984, |
|
"eval_joke_loss": 2.5625, |
|
"eval_joke_runtime": 7.1872, |
|
"eval_joke_samples_per_second": 10.574, |
|
"eval_joke_steps_per_second": 0.696, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_gsm8k_accuracy": 0.836947292796029, |
|
"eval_gsm8k_loss": 0.591796875, |
|
"eval_gsm8k_runtime": 112.7859, |
|
"eval_gsm8k_samples_per_second": 11.695, |
|
"eval_gsm8k_steps_per_second": 0.736, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_samsum_accuracy": 0.6263182026593306, |
|
"eval_samsum_loss": 1.5126953125, |
|
"eval_samsum_runtime": 73.6542, |
|
"eval_samsum_samples_per_second": 11.106, |
|
"eval_samsum_steps_per_second": 0.706, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.997895686156621e-06, |
|
"loss": 1.8114, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.994889523523223e-06, |
|
"loss": 1.857, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.991883360889824e-06, |
|
"loss": 1.8195, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.988877198256426e-06, |
|
"loss": 1.8101, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.985871035623028e-06, |
|
"loss": 1.8169, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.982864872989627e-06, |
|
"loss": 1.8414, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.979858710356229e-06, |
|
"loss": 1.825, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.97685254772283e-06, |
|
"loss": 1.7882, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.973846385089432e-06, |
|
"loss": 1.7857, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.970840222456035e-06, |
|
"loss": 1.8229, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.967834059822637e-06, |
|
"loss": 1.8152, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.964827897189238e-06, |
|
"loss": 1.7865, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.96182173455584e-06, |
|
"loss": 1.8038, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.958815571922442e-06, |
|
"loss": 1.796, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.955809409289041e-06, |
|
"loss": 1.7737, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.952803246655643e-06, |
|
"loss": 1.7958, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.949797084022244e-06, |
|
"loss": 1.7981, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.946790921388846e-06, |
|
"loss": 1.8061, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.943784758755448e-06, |
|
"loss": 1.8395, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.940778596122049e-06, |
|
"loss": 1.7803, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.937772433488652e-06, |
|
"loss": 1.7906, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.934766270855254e-06, |
|
"loss": 1.8076, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.931760108221855e-06, |
|
"loss": 1.7971, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.928753945588455e-06, |
|
"loss": 1.8292, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.925747782955057e-06, |
|
"loss": 1.7876, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.922741620321658e-06, |
|
"loss": 1.7499, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.91973545768826e-06, |
|
"loss": 1.8209, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.916729295054861e-06, |
|
"loss": 1.7971, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.913723132421463e-06, |
|
"loss": 1.8168, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.910716969788065e-06, |
|
"loss": 1.7771, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.907710807154668e-06, |
|
"loss": 1.7611, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.90470464452127e-06, |
|
"loss": 1.7673, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.90169848188787e-06, |
|
"loss": 1.8694, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.89869231925447e-06, |
|
"loss": 1.7543, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.895686156621072e-06, |
|
"loss": 1.759, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.892679993987674e-06, |
|
"loss": 1.7929, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.889673831354275e-06, |
|
"loss": 1.8223, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.886667668720877e-06, |
|
"loss": 1.7829, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.883661506087479e-06, |
|
"loss": 1.8088, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.88065534345408e-06, |
|
"loss": 1.8012, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.877649180820682e-06, |
|
"loss": 1.7337, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.874643018187283e-06, |
|
"loss": 1.7848, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.871636855553885e-06, |
|
"loss": 1.7741, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.868630692920486e-06, |
|
"loss": 1.8111, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.865624530287088e-06, |
|
"loss": 1.7663, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.86261836765369e-06, |
|
"loss": 1.7635, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.859612205020291e-06, |
|
"loss": 1.7613, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.856606042386892e-06, |
|
"loss": 1.7537, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.853599879753494e-06, |
|
"loss": 1.8094, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.850593717120096e-06, |
|
"loss": 1.7487, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_webgpt_accuracy": 0.486666038374247, |
|
"eval_webgpt_loss": 2.384765625, |
|
"eval_webgpt_runtime": 336.8175, |
|
"eval_webgpt_samples_per_second": 11.626, |
|
"eval_webgpt_steps_per_second": 0.727, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_prompt_dialogue_accuracy": 0.5696171266680501, |
|
"eval_prompt_dialogue_loss": 1.7705078125, |
|
"eval_prompt_dialogue_runtime": 809.6613, |
|
"eval_prompt_dialogue_samples_per_second": 12.732, |
|
"eval_prompt_dialogue_steps_per_second": 0.797, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_squad_v2_accuracy": 0.9273763713281866, |
|
"eval_squad_v2_loss": NaN, |
|
"eval_squad_v2_runtime": 941.2422, |
|
"eval_squad_v2_samples_per_second": 12.614, |
|
"eval_squad_v2_steps_per_second": 0.789, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_adversarial_qa_accuracy": 0.8228088336783989, |
|
"eval_adversarial_qa_loss": 0.76416015625, |
|
"eval_adversarial_qa_runtime": 238.4374, |
|
"eval_adversarial_qa_samples_per_second": 12.582, |
|
"eval_adversarial_qa_steps_per_second": 0.788, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_trivia_qa_nocontext_accuracy": 0.47413712205101255, |
|
"eval_trivia_qa_nocontext_loss": 2.9921875, |
|
"eval_trivia_qa_nocontext_runtime": 1248.2736, |
|
"eval_trivia_qa_nocontext_samples_per_second": 14.375, |
|
"eval_trivia_qa_nocontext_steps_per_second": 0.899, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_m2m_translation_accuracy": 0.6384202273383193, |
|
"eval_m2m_translation_loss": 1.5703125, |
|
"eval_m2m_translation_runtime": 3725.8719, |
|
"eval_m2m_translation_samples_per_second": 14.204, |
|
"eval_m2m_translation_steps_per_second": 0.888, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_xsum_accuracy": 0.5815857676257413, |
|
"eval_xsum_loss": 1.87109375, |
|
"eval_xsum_runtime": 1009.1324, |
|
"eval_xsum_samples_per_second": 11.229, |
|
"eval_xsum_steps_per_second": 0.703, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_cnn_dailymail_accuracy": 0.6627513117154793, |
|
"eval_cnn_dailymail_loss": 1.537109375, |
|
"eval_cnn_dailymail_runtime": 1211.2046, |
|
"eval_cnn_dailymail_samples_per_second": 11.037, |
|
"eval_cnn_dailymail_steps_per_second": 0.69, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_multi_news_accuracy": 0.5242124214780086, |
|
"eval_multi_news_loss": 2.287109375, |
|
"eval_multi_news_runtime": 513.1245, |
|
"eval_multi_news_samples_per_second": 10.956, |
|
"eval_multi_news_steps_per_second": 0.686, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_scitldr_accuracy": 0.46804191723122984, |
|
"eval_scitldr_loss": 2.4921875, |
|
"eval_scitldr_runtime": 56.2815, |
|
"eval_scitldr_samples_per_second": 10.998, |
|
"eval_scitldr_steps_per_second": 0.693, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_soda_accuracy": 0.7266564618412421, |
|
"eval_soda_loss": 1.12890625, |
|
"eval_soda_runtime": 82.0556, |
|
"eval_soda_samples_per_second": 12.199, |
|
"eval_soda_steps_per_second": 0.768, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_joke_accuracy": 0.45995283430867323, |
|
"eval_joke_loss": 2.5, |
|
"eval_joke_runtime": 7.0053, |
|
"eval_joke_samples_per_second": 10.849, |
|
"eval_joke_steps_per_second": 0.714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gsm8k_accuracy": 0.8421233100385344, |
|
"eval_gsm8k_loss": 0.57177734375, |
|
"eval_gsm8k_runtime": 113.8436, |
|
"eval_gsm8k_samples_per_second": 11.586, |
|
"eval_gsm8k_steps_per_second": 0.729, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_samsum_accuracy": 0.6305018339446482, |
|
"eval_samsum_loss": 1.490234375, |
|
"eval_samsum_runtime": 72.6647, |
|
"eval_samsum_samples_per_second": 11.257, |
|
"eval_samsum_steps_per_second": 0.716, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.847587554486697e-06, |
|
"loss": 1.7693, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.844882008116638e-06, |
|
"loss": 1.7719, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.84187584548324e-06, |
|
"loss": 1.7777, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.838869682849841e-06, |
|
"loss": 1.7922, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.835863520216443e-06, |
|
"loss": 1.7688, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.832857357583046e-06, |
|
"loss": 1.7848, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.829851194949648e-06, |
|
"loss": 1.7795, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.826845032316247e-06, |
|
"loss": 1.7826, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.823838869682849e-06, |
|
"loss": 1.7511, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.82083270704945e-06, |
|
"loss": 1.7502, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.817826544416052e-06, |
|
"loss": 1.7384, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.814820381782654e-06, |
|
"loss": 1.773, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.811814219149255e-06, |
|
"loss": 1.785, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.808808056515857e-06, |
|
"loss": 1.7407, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.805801893882458e-06, |
|
"loss": 1.8115, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.80279573124906e-06, |
|
"loss": 1.8207, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.799789568615661e-06, |
|
"loss": 1.7618, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.796783405982263e-06, |
|
"loss": 1.7725, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.793777243348864e-06, |
|
"loss": 1.7618, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.790771080715466e-06, |
|
"loss": 1.7567, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.787764918082068e-06, |
|
"loss": 1.7704, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.784758755448669e-06, |
|
"loss": 1.7426, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.78175259281527e-06, |
|
"loss": 1.7373, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.778746430181872e-06, |
|
"loss": 1.7966, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.775740267548474e-06, |
|
"loss": 1.773, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.772734104915075e-06, |
|
"loss": 1.7664, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.769727942281677e-06, |
|
"loss": 1.7518, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.766721779648278e-06, |
|
"loss": 1.7743, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.76371561701488e-06, |
|
"loss": 1.7882, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.760709454381481e-06, |
|
"loss": 1.7534, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.757703291748083e-06, |
|
"loss": 1.7761, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.754697129114685e-06, |
|
"loss": 1.7951, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.751690966481286e-06, |
|
"loss": 1.7287, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.748684803847888e-06, |
|
"loss": 1.7674, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.74567864121449e-06, |
|
"loss": 1.7664, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.74267247858109e-06, |
|
"loss": 1.7569, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.739666315947692e-06, |
|
"loss": 1.8026, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.736960769577633e-06, |
|
"loss": 1.7262, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.733954606944235e-06, |
|
"loss": 1.7266, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.730948444310836e-06, |
|
"loss": 1.7476, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.727942281677438e-06, |
|
"loss": 1.7544, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.72493611904404e-06, |
|
"loss": 1.7859, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.721929956410641e-06, |
|
"loss": 1.7386, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.718923793777243e-06, |
|
"loss": 1.7012, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.715917631143844e-06, |
|
"loss": 1.7736, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.712911468510446e-06, |
|
"loss": 1.7398, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.709905305877047e-06, |
|
"loss": 1.8026, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.706899143243649e-06, |
|
"loss": 1.798, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.70389298061025e-06, |
|
"loss": 1.802, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.700886817976852e-06, |
|
"loss": 1.7447, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_webgpt_accuracy": 0.48722841158921415, |
|
"eval_webgpt_loss": 2.37890625, |
|
"eval_webgpt_runtime": 335.1847, |
|
"eval_webgpt_samples_per_second": 11.683, |
|
"eval_webgpt_steps_per_second": 0.731, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_prompt_dialogue_accuracy": 0.5729272869993594, |
|
"eval_prompt_dialogue_loss": 1.7509765625, |
|
"eval_prompt_dialogue_runtime": 808.5089, |
|
"eval_prompt_dialogue_samples_per_second": 12.751, |
|
"eval_prompt_dialogue_steps_per_second": 0.798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_squad_v2_accuracy": 0.9350127871125027, |
|
"eval_squad_v2_loss": NaN, |
|
"eval_squad_v2_runtime": 937.9399, |
|
"eval_squad_v2_samples_per_second": 12.659, |
|
"eval_squad_v2_steps_per_second": 0.792, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_adversarial_qa_accuracy": 0.8308028525419829, |
|
"eval_adversarial_qa_loss": 0.7265625, |
|
"eval_adversarial_qa_runtime": 238.5652, |
|
"eval_adversarial_qa_samples_per_second": 12.575, |
|
"eval_adversarial_qa_steps_per_second": 0.788, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_trivia_qa_nocontext_accuracy": 0.47783426824006153, |
|
"eval_trivia_qa_nocontext_loss": 2.9609375, |
|
"eval_trivia_qa_nocontext_runtime": 1249.121, |
|
"eval_trivia_qa_nocontext_samples_per_second": 14.365, |
|
"eval_trivia_qa_nocontext_steps_per_second": 0.898, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_m2m_translation_accuracy": 0.6576231472988909, |
|
"eval_m2m_translation_loss": 1.4755859375, |
|
"eval_m2m_translation_runtime": 3725.504, |
|
"eval_m2m_translation_samples_per_second": 14.206, |
|
"eval_m2m_translation_steps_per_second": 0.888, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_xsum_accuracy": 0.5862788967827711, |
|
"eval_xsum_loss": 1.8447265625, |
|
"eval_xsum_runtime": 1007.4241, |
|
"eval_xsum_samples_per_second": 11.248, |
|
"eval_xsum_steps_per_second": 0.704, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_cnn_dailymail_accuracy": 0.6635118400214548, |
|
"eval_cnn_dailymail_loss": 1.5224609375, |
|
"eval_cnn_dailymail_runtime": 1207.7216, |
|
"eval_cnn_dailymail_samples_per_second": 11.069, |
|
"eval_cnn_dailymail_steps_per_second": 0.692, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_multi_news_accuracy": 0.5254140019732162, |
|
"eval_multi_news_loss": 2.271484375, |
|
"eval_multi_news_runtime": 511.4499, |
|
"eval_multi_news_samples_per_second": 10.992, |
|
"eval_multi_news_steps_per_second": 0.688, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_scitldr_accuracy": 0.46584969807682547, |
|
"eval_scitldr_loss": 2.50390625, |
|
"eval_scitldr_runtime": 56.1361, |
|
"eval_scitldr_samples_per_second": 11.027, |
|
"eval_scitldr_steps_per_second": 0.695, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_soda_accuracy": 0.7280997157227204, |
|
"eval_soda_loss": 1.1171875, |
|
"eval_soda_runtime": 81.9632, |
|
"eval_soda_samples_per_second": 12.213, |
|
"eval_soda_steps_per_second": 0.769, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_joke_accuracy": 0.4692986287011966, |
|
"eval_joke_loss": 2.46484375, |
|
"eval_joke_runtime": 6.7476, |
|
"eval_joke_samples_per_second": 11.263, |
|
"eval_joke_steps_per_second": 0.741, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_gsm8k_accuracy": 0.8437234667885833, |
|
"eval_gsm8k_loss": 0.5634765625, |
|
"eval_gsm8k_runtime": 113.7256, |
|
"eval_gsm8k_samples_per_second": 11.598, |
|
"eval_gsm8k_steps_per_second": 0.73, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_samsum_accuracy": 0.6325025008336113, |
|
"eval_samsum_loss": 1.4638671875, |
|
"eval_samsum_runtime": 72.3462, |
|
"eval_samsum_samples_per_second": 11.307, |
|
"eval_samsum_steps_per_second": 0.719, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 27612, |
|
"num_train_epochs": 3, |
|
"total_flos": 428521886187520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|