{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.217287974468663, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.666666666666667e-06, "loss": 2.5689, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.410009602540877e-06, "loss": 2.2872, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.899727994397217e-06, "loss": 2.2045, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.242838952070665e-06, "loss": 2.1665, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.507189546742703e-06, "loss": 2.0691, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.722272031045718e-06, "loss": 2.0554, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.9035975752993475e-06, "loss": 2.0278, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.060338910107844e-06, "loss": 2.0495, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.1983733510531e-06, "loss": 2.0476, "step": 90 }, { "epoch": 0.01, "learning_rate": 5.3216938522601335e-06, "loss": 2.0547, "step": 100 }, { "epoch": 0.01, "learning_rate": 5.433137327841662e-06, "loss": 1.992, "step": 110 }, { "epoch": 0.01, "learning_rate": 5.534791897046749e-06, "loss": 2.0016, "step": 120 }, { "epoch": 0.01, "learning_rate": 5.6282392274646635e-06, "loss": 2.0251, "step": 130 }, { "epoch": 0.02, "learning_rate": 5.71470613401092e-06, "loss": 1.9642, "step": 140 }, { "epoch": 0.02, "learning_rate": 5.795163382432731e-06, "loss": 1.9489, "step": 150 }, { "epoch": 0.02, "learning_rate": 5.870392331521204e-06, "loss": 1.9492, "step": 160 }, { "epoch": 0.02, "learning_rate": 5.941031212303129e-06, "loss": 1.9794, "step": 170 }, { "epoch": 0.02, "learning_rate": 6.007608082613048e-06, "loss": 1.9443, "step": 180 }, { "epoch": 0.02, "learning_rate": 6.070564811128651e-06, "loss": 1.9561, "step": 190 }, { "epoch": 0.02, "learning_rate": 6.130274870425884e-06, "loss": 1.9445, "step": 200 }, { "epoch": 0.02, "learning_rate": 6.187056762962811e-06, "loss": 1.9922, "step": 210 }, { "epoch": 0.02, "learning_rate": 6.241184306240316e-06, "loss": 1.9092, "step": 220 }, { "epoch": 0.02, "learning_rate": 6.292894619573035e-06, "loss": 1.9877, "step": 230 }, { "epoch": 0.03, "learning_rate": 6.3423944025283676e-06, "loss": 1.9013, "step": 240 }, { "epoch": 0.03, "learning_rate": 6.385204482203244e-06, "loss": 1.9351, "step": 250 }, { "epoch": 0.03, "learning_rate": 6.430985882568613e-06, "loss": 1.9364, "step": 260 }, { "epoch": 0.03, "learning_rate": 6.475026117410103e-06, "loss": 1.9298, "step": 270 }, { "epoch": 0.03, "learning_rate": 6.51745278911487e-06, "loss": 1.9262, "step": 280 }, { "epoch": 0.03, "learning_rate": 6.55837996735795e-06, "loss": 1.9245, "step": 290 }, { "epoch": 0.03, "learning_rate": 6.597910037536681e-06, "loss": 1.9264, "step": 300 }, { "epoch": 0.03, "learning_rate": 6.6361352440011835e-06, "loss": 1.9318, "step": 310 }, { "epoch": 0.03, "learning_rate": 6.673138986625154e-06, "loss": 1.8992, "step": 320 }, { "epoch": 0.04, "learning_rate": 6.708996916564478e-06, "loss": 1.8958, "step": 330 }, { "epoch": 0.04, "learning_rate": 6.74377786740708e-06, "loss": 1.9317, "step": 340 }, { "epoch": 0.04, "learning_rate": 6.777544650524216e-06, "loss": 1.8891, "step": 350 }, { "epoch": 0.04, "learning_rate": 6.810354737716999e-06, "loss": 1.8944, "step": 360 }, { "epoch": 0.04, "learning_rate": 6.842260849796047e-06, "loss": 1.8904, "step": 370 }, { "epoch": 0.04, "learning_rate": 6.873311466232601e-06, "loss": 1.9326, "step": 380 }, { "epoch": 0.04, "learning_rate": 6.903551268251219e-06, "loss": 1.8795, "step": 390 }, { "epoch": 0.04, "learning_rate": 6.933021525529835e-06, "loss": 1.8729, "step": 400 }, { "epoch": 0.04, "learning_rate": 6.961760434906346e-06, "loss": 1.861, "step": 410 }, { "epoch": 0.05, "learning_rate": 6.9898034180667605e-06, "loss": 1.9003, "step": 420 }, { "epoch": 0.05, "learning_rate": 7.017183384035125e-06, "loss": 1.9061, "step": 430 }, { "epoch": 0.05, "learning_rate": 7.043930961344266e-06, "loss": 1.9146, "step": 440 }, { "epoch": 0.05, "learning_rate": 7.07007470399505e-06, "loss": 1.8741, "step": 450 }, { "epoch": 0.05, "learning_rate": 7.095641274676984e-06, "loss": 1.8402, "step": 460 }, { "epoch": 0.05, "learning_rate": 7.120655608197665e-06, "loss": 1.9109, "step": 470 }, { "epoch": 0.05, "learning_rate": 7.145141057632318e-06, "loss": 1.8829, "step": 480 }, { "epoch": 0.05, "learning_rate": 7.169119525340562e-06, "loss": 1.8557, "step": 490 }, { "epoch": 0.05, "learning_rate": 7.19261158069258e-06, "loss": 1.8577, "step": 500 }, { "epoch": 0.05, "eval_webgpt_accuracy": 0.4847224518236187, "eval_webgpt_loss": 2.392578125, "eval_webgpt_runtime": 337.627, "eval_webgpt_samples_per_second": 11.599, "eval_webgpt_steps_per_second": 0.726, "step": 500 }, { "epoch": 0.05, "eval_prompt_dialogue_accuracy": 0.5612746203747981, "eval_prompt_dialogue_loss": 1.828125, "eval_prompt_dialogue_runtime": 811.1162, "eval_prompt_dialogue_samples_per_second": 12.71, "eval_prompt_dialogue_steps_per_second": 0.795, "step": 500 }, { "epoch": 0.05, "eval_squad_v2_accuracy": 0.9115060111379335, "eval_squad_v2_loss": NaN, "eval_squad_v2_runtime": 941.3453, "eval_squad_v2_samples_per_second": 12.613, "eval_squad_v2_steps_per_second": 0.789, "step": 500 }, { "epoch": 0.05, "eval_adversarial_qa_accuracy": 0.8083160800552105, "eval_adversarial_qa_loss": 0.86083984375, "eval_adversarial_qa_runtime": 238.8648, "eval_adversarial_qa_samples_per_second": 12.559, "eval_adversarial_qa_steps_per_second": 0.787, "step": 500 }, { "epoch": 0.05, "eval_trivia_qa_nocontext_accuracy": 0.45998188422713815, "eval_trivia_qa_nocontext_loss": 3.12890625, "eval_trivia_qa_nocontext_runtime": 1249.2124, "eval_trivia_qa_nocontext_samples_per_second": 14.364, "eval_trivia_qa_nocontext_steps_per_second": 0.898, "step": 500 }, { "epoch": 0.05, "eval_m2m_translation_accuracy": 0.5733300105250808, "eval_m2m_translation_loss": 1.8857421875, "eval_m2m_translation_runtime": 3725.0595, "eval_m2m_translation_samples_per_second": 14.208, "eval_m2m_translation_steps_per_second": 0.888, "step": 500 }, { "epoch": 0.05, "eval_xsum_accuracy": 0.5671291686677834, "eval_xsum_loss": 1.9658203125, "eval_xsum_runtime": 1011.0399, "eval_xsum_samples_per_second": 11.208, "eval_xsum_steps_per_second": 0.701, "step": 500 }, { "epoch": 0.05, "eval_cnn_dailymail_accuracy": 0.6565405592058271, "eval_cnn_dailymail_loss": 1.5810546875, "eval_cnn_dailymail_runtime": 1213.6108, "eval_cnn_dailymail_samples_per_second": 11.015, "eval_cnn_dailymail_steps_per_second": 0.689, "step": 500 }, { "epoch": 0.05, "eval_multi_news_accuracy": 0.5175576159426271, "eval_multi_news_loss": 2.33203125, "eval_multi_news_runtime": 512.3687, "eval_multi_news_samples_per_second": 10.973, "eval_multi_news_steps_per_second": 0.687, "step": 500 }, { "epoch": 0.05, "eval_scitldr_accuracy": 0.46241087965899574, "eval_scitldr_loss": 2.5234375, "eval_scitldr_runtime": 57.2333, "eval_scitldr_samples_per_second": 10.815, "eval_scitldr_steps_per_second": 0.681, "step": 500 }, { "epoch": 0.05, "eval_soda_accuracy": 0.7121364530942489, "eval_soda_loss": 1.21484375, "eval_soda_runtime": 82.3256, "eval_soda_samples_per_second": 12.159, "eval_soda_steps_per_second": 0.765, "step": 500 }, { "epoch": 0.05, "eval_joke_accuracy": 0.454450170320552, "eval_joke_loss": 2.62109375, "eval_joke_runtime": 6.9296, "eval_joke_samples_per_second": 10.967, "eval_joke_steps_per_second": 0.722, "step": 500 }, { "epoch": 0.05, "eval_gsm8k_accuracy": 0.8305793220560381, "eval_gsm8k_loss": 0.61328125, "eval_gsm8k_runtime": 113.5097, "eval_gsm8k_samples_per_second": 11.62, "eval_gsm8k_steps_per_second": 0.731, "step": 500 }, { "epoch": 0.05, "eval_samsum_accuracy": 0.6179719084733047, "eval_samsum_loss": 1.5537109375, "eval_samsum_runtime": 73.3198, "eval_samsum_samples_per_second": 11.157, "eval_samsum_steps_per_second": 0.709, "step": 500 }, { "epoch": 0.06, "learning_rate": 7.215636566090451e-06, "loss": 1.8655, "step": 510 }, { "epoch": 0.06, "learning_rate": 7.238212692653954e-06, "loss": 1.9284, "step": 520 }, { "epoch": 0.06, "learning_rate": 7.260357126756832e-06, "loss": 1.8522, "step": 530 }, { "epoch": 0.06, "learning_rate": 7.282086068443704e-06, "loss": 1.8869, "step": 540 }, { "epoch": 0.06, "learning_rate": 7.303414822624985e-06, "loss": 1.905, "step": 550 }, { "epoch": 0.06, "learning_rate": 7.324357863833543e-06, "loss": 1.8483, "step": 560 }, { "epoch": 0.06, "learning_rate": 7.344928895229384e-06, "loss": 1.8628, "step": 570 }, { "epoch": 0.06, "learning_rate": 7.365140902454744e-06, "loss": 1.8982, "step": 580 }, { "epoch": 0.06, "learning_rate": 7.385006202869703e-06, "loss": 1.834, "step": 590 }, { "epoch": 0.07, "learning_rate": 7.404536490635762e-06, "loss": 1.8488, "step": 600 }, { "epoch": 0.07, "learning_rate": 7.423742878060626e-06, "loss": 1.8585, "step": 610 }, { "epoch": 0.07, "learning_rate": 7.442635933570175e-06, "loss": 1.8292, "step": 620 }, { "epoch": 0.07, "learning_rate": 7.461225716632522e-06, "loss": 1.8399, "step": 630 }, { "epoch": 0.07, "learning_rate": 7.479521809923099e-06, "loss": 1.8775, "step": 640 }, { "epoch": 0.07, "learning_rate": 7.497533348988249e-06, "loss": 1.8593, "step": 650 }, { "epoch": 0.07, "learning_rate": 7.515269049637215e-06, "loss": 1.8557, "step": 660 }, { "epoch": 0.07, "learning_rate": 7.532737233268121e-06, "loss": 1.8427, "step": 670 }, { "epoch": 0.07, "learning_rate": 7.5499458503121685e-06, "loss": 1.859, "step": 680 }, { "epoch": 0.07, "learning_rate": 7.566902501961364e-06, "loss": 1.8111, "step": 690 }, { "epoch": 0.08, "learning_rate": 7.5836144603284295e-06, "loss": 1.8246, "step": 700 }, { "epoch": 0.08, "learning_rate": 7.600088687172717e-06, "loss": 1.8613, "step": 710 }, { "epoch": 0.08, "learning_rate": 7.616331851312801e-06, "loss": 1.8478, "step": 720 }, { "epoch": 0.08, "learning_rate": 7.632350344834765e-06, "loss": 1.8839, "step": 730 }, { "epoch": 0.08, "learning_rate": 7.648150298194777e-06, "loss": 1.8625, "step": 740 }, { "epoch": 0.08, "learning_rate": 7.663737594305231e-06, "loss": 1.8291, "step": 750 }, { "epoch": 0.08, "learning_rate": 7.679117881685476e-06, "loss": 1.836, "step": 760 }, { "epoch": 0.08, "learning_rate": 7.6942965867507e-06, "loss": 1.8102, "step": 770 }, { "epoch": 0.08, "learning_rate": 7.709278925305836e-06, "loss": 1.8353, "step": 780 }, { "epoch": 0.09, "learning_rate": 7.72406991330548e-06, "loss": 1.8713, "step": 790 }, { "epoch": 0.09, "learning_rate": 7.738674376935279e-06, "loss": 1.8314, "step": 800 }, { "epoch": 0.09, "learning_rate": 7.753096962065562e-06, "loss": 1.8553, "step": 810 }, { "epoch": 0.09, "learning_rate": 7.767342143123527e-06, "loss": 1.8318, "step": 820 }, { "epoch": 0.09, "learning_rate": 7.781414231426347e-06, "loss": 1.8401, "step": 830 }, { "epoch": 0.09, "learning_rate": 7.79531738301407e-06, "loss": 1.8108, "step": 840 }, { "epoch": 0.09, "learning_rate": 7.809055606017904e-06, "loss": 1.8563, "step": 850 }, { "epoch": 0.09, "learning_rate": 7.822632767596549e-06, "loss": 1.8408, "step": 860 }, { "epoch": 0.09, "learning_rate": 7.836052600470646e-06, "loss": 1.8096, "step": 870 }, { "epoch": 0.1, "learning_rate": 7.849318709082941e-06, "loss": 1.8264, "step": 880 }, { "epoch": 0.1, "learning_rate": 7.862434575409602e-06, "loss": 1.8902, "step": 890 }, { "epoch": 0.1, "learning_rate": 7.875403564446145e-06, "loss": 1.7953, "step": 900 }, { "epoch": 0.1, "learning_rate": 7.888228929389561e-06, "loss": 1.837, "step": 910 }, { "epoch": 0.1, "learning_rate": 7.900913816536647e-06, "loss": 1.7704, "step": 920 }, { "epoch": 0.1, "learning_rate": 7.913461269916965e-06, "loss": 1.8251, "step": 930 }, { "epoch": 0.1, "learning_rate": 7.925874235677506e-06, "loss": 1.8294, "step": 940 }, { "epoch": 0.1, "learning_rate": 7.938155566234842e-06, "loss": 1.8306, "step": 950 }, { "epoch": 0.1, "learning_rate": 7.950308024209451e-06, "loss": 1.7889, "step": 960 }, { "epoch": 0.11, "learning_rate": 7.962334286155715e-06, "loss": 1.7922, "step": 970 }, { "epoch": 0.11, "learning_rate": 7.974236946100272e-06, "loss": 1.7679, "step": 980 }, { "epoch": 0.11, "learning_rate": 7.986018518900343e-06, "loss": 1.8068, "step": 990 }, { "epoch": 0.11, "learning_rate": 7.997681443432989e-06, "loss": 1.8467, "step": 1000 }, { "epoch": 0.11, "eval_webgpt_accuracy": 0.4853852488269729, "eval_webgpt_loss": 2.388671875, "eval_webgpt_runtime": 337.66, "eval_webgpt_samples_per_second": 11.597, "eval_webgpt_steps_per_second": 0.726, "step": 1000 }, { "epoch": 0.11, "eval_prompt_dialogue_accuracy": 0.5656945584798752, "eval_prompt_dialogue_loss": 1.7978515625, "eval_prompt_dialogue_runtime": 810.7166, "eval_prompt_dialogue_samples_per_second": 12.716, "eval_prompt_dialogue_steps_per_second": 0.796, "step": 1000 }, { "epoch": 0.11, "eval_squad_v2_accuracy": 0.929180907765482, "eval_squad_v2_loss": NaN, "eval_squad_v2_runtime": 942.6897, "eval_squad_v2_samples_per_second": 12.595, "eval_squad_v2_steps_per_second": 0.788, "step": 1000 }, { "epoch": 0.11, "eval_adversarial_qa_accuracy": 0.8106740280653324, "eval_adversarial_qa_loss": 0.8291015625, "eval_adversarial_qa_runtime": 239.9039, "eval_adversarial_qa_samples_per_second": 12.505, "eval_adversarial_qa_steps_per_second": 0.784, "step": 1000 }, { "epoch": 0.11, "eval_trivia_qa_nocontext_accuracy": 0.46837345154352583, "eval_trivia_qa_nocontext_loss": 3.052734375, "eval_trivia_qa_nocontext_runtime": 1248.9886, "eval_trivia_qa_nocontext_samples_per_second": 14.367, "eval_trivia_qa_nocontext_steps_per_second": 0.898, "step": 1000 }, { "epoch": 0.11, "eval_m2m_translation_accuracy": 0.6128791362753888, "eval_m2m_translation_loss": 1.6904296875, "eval_m2m_translation_runtime": 3724.3046, "eval_m2m_translation_samples_per_second": 14.21, "eval_m2m_translation_steps_per_second": 0.888, "step": 1000 }, { "epoch": 0.11, "eval_xsum_accuracy": 0.576201200101912, "eval_xsum_loss": 1.90625, "eval_xsum_runtime": 1013.837, "eval_xsum_samples_per_second": 11.177, "eval_xsum_steps_per_second": 0.699, "step": 1000 }, { "epoch": 0.11, "eval_cnn_dailymail_accuracy": 0.6590085257947359, "eval_cnn_dailymail_loss": 1.5537109375, "eval_cnn_dailymail_runtime": 1212.8269, "eval_cnn_dailymail_samples_per_second": 11.022, "eval_cnn_dailymail_steps_per_second": 0.689, "step": 1000 }, { "epoch": 0.11, "eval_multi_news_accuracy": 0.5213214574389622, "eval_multi_news_loss": 2.302734375, "eval_multi_news_runtime": 514.4889, "eval_multi_news_samples_per_second": 10.927, "eval_multi_news_steps_per_second": 0.684, "step": 1000 }, { "epoch": 0.11, "eval_scitldr_accuracy": 0.4667123357182727, "eval_scitldr_loss": 2.50390625, "eval_scitldr_runtime": 55.8523, "eval_scitldr_samples_per_second": 11.083, "eval_scitldr_steps_per_second": 0.698, "step": 1000 }, { "epoch": 0.11, "eval_soda_accuracy": 0.7221954952984911, "eval_soda_loss": 1.1513671875, "eval_soda_runtime": 82.1743, "eval_soda_samples_per_second": 12.181, "eval_soda_steps_per_second": 0.767, "step": 1000 }, { "epoch": 0.11, "eval_joke_accuracy": 0.45549829679447984, "eval_joke_loss": 2.5625, "eval_joke_runtime": 7.1872, "eval_joke_samples_per_second": 10.574, "eval_joke_steps_per_second": 0.696, "step": 1000 }, { "epoch": 0.11, "eval_gsm8k_accuracy": 0.836947292796029, "eval_gsm8k_loss": 0.591796875, "eval_gsm8k_runtime": 112.7859, "eval_gsm8k_samples_per_second": 11.695, "eval_gsm8k_steps_per_second": 0.736, "step": 1000 }, { "epoch": 0.11, "eval_samsum_accuracy": 0.6263182026593306, "eval_samsum_loss": 1.5126953125, "eval_samsum_runtime": 73.6542, "eval_samsum_samples_per_second": 11.106, "eval_samsum_steps_per_second": 0.706, "step": 1000 }, { "epoch": 0.11, "learning_rate": 7.997895686156621e-06, "loss": 1.8114, "step": 1010 }, { "epoch": 0.11, "learning_rate": 7.994889523523223e-06, "loss": 1.857, "step": 1020 }, { "epoch": 0.11, "learning_rate": 7.991883360889824e-06, "loss": 1.8195, "step": 1030 }, { "epoch": 0.11, "learning_rate": 7.988877198256426e-06, "loss": 1.8101, "step": 1040 }, { "epoch": 0.11, "learning_rate": 7.985871035623028e-06, "loss": 1.8169, "step": 1050 }, { "epoch": 0.12, "learning_rate": 7.982864872989627e-06, "loss": 1.8414, "step": 1060 }, { "epoch": 0.12, "learning_rate": 7.979858710356229e-06, "loss": 1.825, "step": 1070 }, { "epoch": 0.12, "learning_rate": 7.97685254772283e-06, "loss": 1.7882, "step": 1080 }, { "epoch": 0.12, "learning_rate": 7.973846385089432e-06, "loss": 1.7857, "step": 1090 }, { "epoch": 0.12, "learning_rate": 7.970840222456035e-06, "loss": 1.8229, "step": 1100 }, { "epoch": 0.12, "learning_rate": 7.967834059822637e-06, "loss": 1.8152, "step": 1110 }, { "epoch": 0.12, "learning_rate": 7.964827897189238e-06, "loss": 1.7865, "step": 1120 }, { "epoch": 0.12, "learning_rate": 7.96182173455584e-06, "loss": 1.8038, "step": 1130 }, { "epoch": 0.12, "learning_rate": 7.958815571922442e-06, "loss": 1.796, "step": 1140 }, { "epoch": 0.12, "learning_rate": 7.955809409289041e-06, "loss": 1.7737, "step": 1150 }, { "epoch": 0.13, "learning_rate": 7.952803246655643e-06, "loss": 1.7958, "step": 1160 }, { "epoch": 0.13, "learning_rate": 7.949797084022244e-06, "loss": 1.7981, "step": 1170 }, { "epoch": 0.13, "learning_rate": 7.946790921388846e-06, "loss": 1.8061, "step": 1180 }, { "epoch": 0.13, "learning_rate": 7.943784758755448e-06, "loss": 1.8395, "step": 1190 }, { "epoch": 0.13, "learning_rate": 7.940778596122049e-06, "loss": 1.7803, "step": 1200 }, { "epoch": 0.13, "learning_rate": 7.937772433488652e-06, "loss": 1.7906, "step": 1210 }, { "epoch": 0.13, "learning_rate": 7.934766270855254e-06, "loss": 1.8076, "step": 1220 }, { "epoch": 0.13, "learning_rate": 7.931760108221855e-06, "loss": 1.7971, "step": 1230 }, { "epoch": 0.13, "learning_rate": 7.928753945588455e-06, "loss": 1.8292, "step": 1240 }, { "epoch": 0.14, "learning_rate": 7.925747782955057e-06, "loss": 1.7876, "step": 1250 }, { "epoch": 0.14, "learning_rate": 7.922741620321658e-06, "loss": 1.7499, "step": 1260 }, { "epoch": 0.14, "learning_rate": 7.91973545768826e-06, "loss": 1.8209, "step": 1270 }, { "epoch": 0.14, "learning_rate": 7.916729295054861e-06, "loss": 1.7971, "step": 1280 }, { "epoch": 0.14, "learning_rate": 7.913723132421463e-06, "loss": 1.8168, "step": 1290 }, { "epoch": 0.14, "learning_rate": 7.910716969788065e-06, "loss": 1.7771, "step": 1300 }, { "epoch": 0.14, "learning_rate": 7.907710807154668e-06, "loss": 1.7611, "step": 1310 }, { "epoch": 0.14, "learning_rate": 7.90470464452127e-06, "loss": 1.7673, "step": 1320 }, { "epoch": 0.14, "learning_rate": 7.90169848188787e-06, "loss": 1.8694, "step": 1330 }, { "epoch": 0.15, "learning_rate": 7.89869231925447e-06, "loss": 1.7543, "step": 1340 }, { "epoch": 0.15, "learning_rate": 7.895686156621072e-06, "loss": 1.759, "step": 1350 }, { "epoch": 0.15, "learning_rate": 7.892679993987674e-06, "loss": 1.7929, "step": 1360 }, { "epoch": 0.15, "learning_rate": 7.889673831354275e-06, "loss": 1.8223, "step": 1370 }, { "epoch": 0.15, "learning_rate": 7.886667668720877e-06, "loss": 1.7829, "step": 1380 }, { "epoch": 0.15, "learning_rate": 7.883661506087479e-06, "loss": 1.8088, "step": 1390 }, { "epoch": 0.15, "learning_rate": 7.88065534345408e-06, "loss": 1.8012, "step": 1400 }, { "epoch": 0.15, "learning_rate": 7.877649180820682e-06, "loss": 1.7337, "step": 1410 }, { "epoch": 0.15, "learning_rate": 7.874643018187283e-06, "loss": 1.7848, "step": 1420 }, { "epoch": 0.16, "learning_rate": 7.871636855553885e-06, "loss": 1.7741, "step": 1430 }, { "epoch": 0.16, "learning_rate": 7.868630692920486e-06, "loss": 1.8111, "step": 1440 }, { "epoch": 0.16, "learning_rate": 7.865624530287088e-06, "loss": 1.7663, "step": 1450 }, { "epoch": 0.16, "learning_rate": 7.86261836765369e-06, "loss": 1.7635, "step": 1460 }, { "epoch": 0.16, "learning_rate": 7.859612205020291e-06, "loss": 1.7613, "step": 1470 }, { "epoch": 0.16, "learning_rate": 7.856606042386892e-06, "loss": 1.7537, "step": 1480 }, { "epoch": 0.16, "learning_rate": 7.853599879753494e-06, "loss": 1.8094, "step": 1490 }, { "epoch": 0.16, "learning_rate": 7.850593717120096e-06, "loss": 1.7487, "step": 1500 }, { "epoch": 0.16, "eval_webgpt_accuracy": 0.486666038374247, "eval_webgpt_loss": 2.384765625, "eval_webgpt_runtime": 336.8175, "eval_webgpt_samples_per_second": 11.626, "eval_webgpt_steps_per_second": 0.727, "step": 1500 }, { "epoch": 0.16, "eval_prompt_dialogue_accuracy": 0.5696171266680501, "eval_prompt_dialogue_loss": 1.7705078125, "eval_prompt_dialogue_runtime": 809.6613, "eval_prompt_dialogue_samples_per_second": 12.732, "eval_prompt_dialogue_steps_per_second": 0.797, "step": 1500 }, { "epoch": 0.16, "eval_squad_v2_accuracy": 0.9273763713281866, "eval_squad_v2_loss": NaN, "eval_squad_v2_runtime": 941.2422, "eval_squad_v2_samples_per_second": 12.614, "eval_squad_v2_steps_per_second": 0.789, "step": 1500 }, { "epoch": 0.16, "eval_adversarial_qa_accuracy": 0.8228088336783989, "eval_adversarial_qa_loss": 0.76416015625, "eval_adversarial_qa_runtime": 238.4374, "eval_adversarial_qa_samples_per_second": 12.582, "eval_adversarial_qa_steps_per_second": 0.788, "step": 1500 }, { "epoch": 0.16, "eval_trivia_qa_nocontext_accuracy": 0.47413712205101255, "eval_trivia_qa_nocontext_loss": 2.9921875, "eval_trivia_qa_nocontext_runtime": 1248.2736, "eval_trivia_qa_nocontext_samples_per_second": 14.375, "eval_trivia_qa_nocontext_steps_per_second": 0.899, "step": 1500 }, { "epoch": 0.16, "eval_m2m_translation_accuracy": 0.6384202273383193, "eval_m2m_translation_loss": 1.5703125, "eval_m2m_translation_runtime": 3725.8719, "eval_m2m_translation_samples_per_second": 14.204, "eval_m2m_translation_steps_per_second": 0.888, "step": 1500 }, { "epoch": 0.16, "eval_xsum_accuracy": 0.5815857676257413, "eval_xsum_loss": 1.87109375, "eval_xsum_runtime": 1009.1324, "eval_xsum_samples_per_second": 11.229, "eval_xsum_steps_per_second": 0.703, "step": 1500 }, { "epoch": 0.16, "eval_cnn_dailymail_accuracy": 0.6627513117154793, "eval_cnn_dailymail_loss": 1.537109375, "eval_cnn_dailymail_runtime": 1211.2046, "eval_cnn_dailymail_samples_per_second": 11.037, "eval_cnn_dailymail_steps_per_second": 0.69, "step": 1500 }, { "epoch": 0.16, "eval_multi_news_accuracy": 0.5242124214780086, "eval_multi_news_loss": 2.287109375, "eval_multi_news_runtime": 513.1245, "eval_multi_news_samples_per_second": 10.956, "eval_multi_news_steps_per_second": 0.686, "step": 1500 }, { "epoch": 0.16, "eval_scitldr_accuracy": 0.46804191723122984, "eval_scitldr_loss": 2.4921875, "eval_scitldr_runtime": 56.2815, "eval_scitldr_samples_per_second": 10.998, "eval_scitldr_steps_per_second": 0.693, "step": 1500 }, { "epoch": 0.16, "eval_soda_accuracy": 0.7266564618412421, "eval_soda_loss": 1.12890625, "eval_soda_runtime": 82.0556, "eval_soda_samples_per_second": 12.199, "eval_soda_steps_per_second": 0.768, "step": 1500 }, { "epoch": 0.16, "eval_joke_accuracy": 0.45995283430867323, "eval_joke_loss": 2.5, "eval_joke_runtime": 7.0053, "eval_joke_samples_per_second": 10.849, "eval_joke_steps_per_second": 0.714, "step": 1500 }, { "epoch": 0.16, "eval_gsm8k_accuracy": 0.8421233100385344, "eval_gsm8k_loss": 0.57177734375, "eval_gsm8k_runtime": 113.8436, "eval_gsm8k_samples_per_second": 11.586, "eval_gsm8k_steps_per_second": 0.729, "step": 1500 }, { "epoch": 0.16, "eval_samsum_accuracy": 0.6305018339446482, "eval_samsum_loss": 1.490234375, "eval_samsum_runtime": 72.6647, "eval_samsum_samples_per_second": 11.257, "eval_samsum_steps_per_second": 0.716, "step": 1500 }, { "epoch": 0.16, "learning_rate": 7.847587554486697e-06, "loss": 1.7693, "step": 1510 }, { "epoch": 0.17, "learning_rate": 7.844882008116638e-06, "loss": 1.7719, "step": 1520 }, { "epoch": 0.17, "learning_rate": 7.84187584548324e-06, "loss": 1.7777, "step": 1530 }, { "epoch": 0.17, "learning_rate": 7.838869682849841e-06, "loss": 1.7922, "step": 1540 }, { "epoch": 0.17, "learning_rate": 7.835863520216443e-06, "loss": 1.7688, "step": 1550 }, { "epoch": 0.17, "learning_rate": 7.832857357583046e-06, "loss": 1.7848, "step": 1560 }, { "epoch": 0.17, "learning_rate": 7.829851194949648e-06, "loss": 1.7795, "step": 1570 }, { "epoch": 0.17, "learning_rate": 7.826845032316247e-06, "loss": 1.7826, "step": 1580 }, { "epoch": 0.17, "learning_rate": 7.823838869682849e-06, "loss": 1.7511, "step": 1590 }, { "epoch": 0.17, "learning_rate": 7.82083270704945e-06, "loss": 1.7502, "step": 1600 }, { "epoch": 0.17, "learning_rate": 7.817826544416052e-06, "loss": 1.7384, "step": 1610 }, { "epoch": 0.18, "learning_rate": 7.814820381782654e-06, "loss": 1.773, "step": 1620 }, { "epoch": 0.18, "learning_rate": 7.811814219149255e-06, "loss": 1.785, "step": 1630 }, { "epoch": 0.18, "learning_rate": 7.808808056515857e-06, "loss": 1.7407, "step": 1640 }, { "epoch": 0.18, "learning_rate": 7.805801893882458e-06, "loss": 1.8115, "step": 1650 }, { "epoch": 0.18, "learning_rate": 7.80279573124906e-06, "loss": 1.8207, "step": 1660 }, { "epoch": 0.18, "learning_rate": 7.799789568615661e-06, "loss": 1.7618, "step": 1670 }, { "epoch": 0.18, "learning_rate": 7.796783405982263e-06, "loss": 1.7725, "step": 1680 }, { "epoch": 0.18, "learning_rate": 7.793777243348864e-06, "loss": 1.7618, "step": 1690 }, { "epoch": 0.18, "learning_rate": 7.790771080715466e-06, "loss": 1.7567, "step": 1700 }, { "epoch": 0.19, "learning_rate": 7.787764918082068e-06, "loss": 1.7704, "step": 1710 }, { "epoch": 0.19, "learning_rate": 7.784758755448669e-06, "loss": 1.7426, "step": 1720 }, { "epoch": 0.19, "learning_rate": 7.78175259281527e-06, "loss": 1.7373, "step": 1730 }, { "epoch": 0.19, "learning_rate": 7.778746430181872e-06, "loss": 1.7966, "step": 1740 }, { "epoch": 0.19, "learning_rate": 7.775740267548474e-06, "loss": 1.773, "step": 1750 }, { "epoch": 0.19, "learning_rate": 7.772734104915075e-06, "loss": 1.7664, "step": 1760 }, { "epoch": 0.19, "learning_rate": 7.769727942281677e-06, "loss": 1.7518, "step": 1770 }, { "epoch": 0.19, "learning_rate": 7.766721779648278e-06, "loss": 1.7743, "step": 1780 }, { "epoch": 0.19, "learning_rate": 7.76371561701488e-06, "loss": 1.7882, "step": 1790 }, { "epoch": 0.2, "learning_rate": 7.760709454381481e-06, "loss": 1.7534, "step": 1800 }, { "epoch": 0.2, "learning_rate": 7.757703291748083e-06, "loss": 1.7761, "step": 1810 }, { "epoch": 0.2, "learning_rate": 7.754697129114685e-06, "loss": 1.7951, "step": 1820 }, { "epoch": 0.2, "learning_rate": 7.751690966481286e-06, "loss": 1.7287, "step": 1830 }, { "epoch": 0.2, "learning_rate": 7.748684803847888e-06, "loss": 1.7674, "step": 1840 }, { "epoch": 0.2, "learning_rate": 7.74567864121449e-06, "loss": 1.7664, "step": 1850 }, { "epoch": 0.2, "learning_rate": 7.74267247858109e-06, "loss": 1.7569, "step": 1860 }, { "epoch": 0.2, "learning_rate": 7.739666315947692e-06, "loss": 1.8026, "step": 1870 }, { "epoch": 0.2, "learning_rate": 7.736960769577633e-06, "loss": 1.7262, "step": 1880 }, { "epoch": 0.21, "learning_rate": 7.733954606944235e-06, "loss": 1.7266, "step": 1890 }, { "epoch": 0.21, "learning_rate": 7.730948444310836e-06, "loss": 1.7476, "step": 1900 }, { "epoch": 0.21, "learning_rate": 7.727942281677438e-06, "loss": 1.7544, "step": 1910 }, { "epoch": 0.21, "learning_rate": 7.72493611904404e-06, "loss": 1.7859, "step": 1920 }, { "epoch": 0.21, "learning_rate": 7.721929956410641e-06, "loss": 1.7386, "step": 1930 }, { "epoch": 0.21, "learning_rate": 7.718923793777243e-06, "loss": 1.7012, "step": 1940 }, { "epoch": 0.21, "learning_rate": 7.715917631143844e-06, "loss": 1.7736, "step": 1950 }, { "epoch": 0.21, "learning_rate": 7.712911468510446e-06, "loss": 1.7398, "step": 1960 }, { "epoch": 0.21, "learning_rate": 7.709905305877047e-06, "loss": 1.8026, "step": 1970 }, { "epoch": 0.22, "learning_rate": 7.706899143243649e-06, "loss": 1.798, "step": 1980 }, { "epoch": 0.22, "learning_rate": 7.70389298061025e-06, "loss": 1.802, "step": 1990 }, { "epoch": 0.22, "learning_rate": 7.700886817976852e-06, "loss": 1.7447, "step": 2000 }, { "epoch": 0.22, "eval_webgpt_accuracy": 0.48722841158921415, "eval_webgpt_loss": 2.37890625, "eval_webgpt_runtime": 335.1847, "eval_webgpt_samples_per_second": 11.683, "eval_webgpt_steps_per_second": 0.731, "step": 2000 }, { "epoch": 0.22, "eval_prompt_dialogue_accuracy": 0.5729272869993594, "eval_prompt_dialogue_loss": 1.7509765625, "eval_prompt_dialogue_runtime": 808.5089, "eval_prompt_dialogue_samples_per_second": 12.751, "eval_prompt_dialogue_steps_per_second": 0.798, "step": 2000 }, { "epoch": 0.22, "eval_squad_v2_accuracy": 0.9350127871125027, "eval_squad_v2_loss": NaN, "eval_squad_v2_runtime": 937.9399, "eval_squad_v2_samples_per_second": 12.659, "eval_squad_v2_steps_per_second": 0.792, "step": 2000 }, { "epoch": 0.22, "eval_adversarial_qa_accuracy": 0.8308028525419829, "eval_adversarial_qa_loss": 0.7265625, "eval_adversarial_qa_runtime": 238.5652, "eval_adversarial_qa_samples_per_second": 12.575, "eval_adversarial_qa_steps_per_second": 0.788, "step": 2000 }, { "epoch": 0.22, "eval_trivia_qa_nocontext_accuracy": 0.47783426824006153, "eval_trivia_qa_nocontext_loss": 2.9609375, "eval_trivia_qa_nocontext_runtime": 1249.121, "eval_trivia_qa_nocontext_samples_per_second": 14.365, "eval_trivia_qa_nocontext_steps_per_second": 0.898, "step": 2000 }, { "epoch": 0.22, "eval_m2m_translation_accuracy": 0.6576231472988909, "eval_m2m_translation_loss": 1.4755859375, "eval_m2m_translation_runtime": 3725.504, "eval_m2m_translation_samples_per_second": 14.206, "eval_m2m_translation_steps_per_second": 0.888, "step": 2000 }, { "epoch": 0.22, "eval_xsum_accuracy": 0.5862788967827711, "eval_xsum_loss": 1.8447265625, "eval_xsum_runtime": 1007.4241, "eval_xsum_samples_per_second": 11.248, "eval_xsum_steps_per_second": 0.704, "step": 2000 }, { "epoch": 0.22, "eval_cnn_dailymail_accuracy": 0.6635118400214548, "eval_cnn_dailymail_loss": 1.5224609375, "eval_cnn_dailymail_runtime": 1207.7216, "eval_cnn_dailymail_samples_per_second": 11.069, "eval_cnn_dailymail_steps_per_second": 0.692, "step": 2000 }, { "epoch": 0.22, "eval_multi_news_accuracy": 0.5254140019732162, "eval_multi_news_loss": 2.271484375, "eval_multi_news_runtime": 511.4499, "eval_multi_news_samples_per_second": 10.992, "eval_multi_news_steps_per_second": 0.688, "step": 2000 }, { "epoch": 0.22, "eval_scitldr_accuracy": 0.46584969807682547, "eval_scitldr_loss": 2.50390625, "eval_scitldr_runtime": 56.1361, "eval_scitldr_samples_per_second": 11.027, "eval_scitldr_steps_per_second": 0.695, "step": 2000 }, { "epoch": 0.22, "eval_soda_accuracy": 0.7280997157227204, "eval_soda_loss": 1.1171875, "eval_soda_runtime": 81.9632, "eval_soda_samples_per_second": 12.213, "eval_soda_steps_per_second": 0.769, "step": 2000 }, { "epoch": 0.22, "eval_joke_accuracy": 0.4692986287011966, "eval_joke_loss": 2.46484375, "eval_joke_runtime": 6.7476, "eval_joke_samples_per_second": 11.263, "eval_joke_steps_per_second": 0.741, "step": 2000 }, { "epoch": 0.22, "eval_gsm8k_accuracy": 0.8437234667885833, "eval_gsm8k_loss": 0.5634765625, "eval_gsm8k_runtime": 113.7256, "eval_gsm8k_samples_per_second": 11.598, "eval_gsm8k_steps_per_second": 0.73, "step": 2000 }, { "epoch": 0.22, "eval_samsum_accuracy": 0.6325025008336113, "eval_samsum_loss": 1.4638671875, "eval_samsum_runtime": 72.3462, "eval_samsum_samples_per_second": 11.307, "eval_samsum_steps_per_second": 0.719, "step": 2000 } ], "max_steps": 27612, "num_train_epochs": 3, "total_flos": 428521886187520.0, "trial_name": null, "trial_params": null }