{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22919483853223627, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4084967333570947e-06, "loss": 2.2507, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.0507482022971233e-06, "loss": 1.9542, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.385606273598312e-06, "loss": 1.8446, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.6136695401116585e-06, "loss": 1.831, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.7868297632261957e-06, "loss": 1.8121, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.926458092787486e-06, "loss": 1.7884, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.0434580045013773e-06, "loss": 1.755, "step": 70 }, { "epoch": 0.02, "learning_rate": 3.1441512086208035e-06, "loss": 1.7662, "step": 80 }, { "epoch": 0.02, "learning_rate": 3.232532087697698e-06, "loss": 1.7246, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.3112862237770753e-06, "loss": 1.7563, "step": 100 }, { "epoch": 0.03, "learning_rate": 3.3823062961420163e-06, "loss": 1.7531, "step": 110 }, { "epoch": 0.03, "learning_rate": 3.446976436243603e-06, "loss": 1.7334, "step": 120 }, { "epoch": 0.03, "learning_rate": 3.506339534926595e-06, "loss": 1.7231, "step": 130 }, { "epoch": 0.03, "learning_rate": 3.5612009452606784e-06, "loss": 1.7151, "step": 140 }, { "epoch": 0.03, "learning_rate": 3.612195557913627e-06, "loss": 1.7218, "step": 150 }, { "epoch": 0.04, "learning_rate": 3.65983275401539e-06, "loss": 1.7144, "step": 160 }, { "epoch": 0.04, "learning_rate": 3.7045274519126395e-06, "loss": 1.7195, "step": 170 }, { "epoch": 0.04, "learning_rate": 3.7466221106030114e-06, "loss": 1.6989, "step": 180 }, { "epoch": 0.04, "learning_rate": 3.786402677560832e-06, "loss": 1.7034, "step": 190 }, { "epoch": 0.05, "learning_rate": 3.824110376935989e-06, "loss": 1.7049, "step": 200 }, { "epoch": 0.05, "learning_rate": 3.8599505757615295e-06, "loss": 1.7457, "step": 210 }, { "epoch": 0.05, "learning_rate": 3.894099556414216e-06, "loss": 1.7092, "step": 220 }, { "epoch": 0.05, "learning_rate": 3.9267097619885385e-06, "loss": 1.7283, "step": 230 }, { "epoch": 0.05, "learning_rate": 3.95791391001684e-06, "loss": 1.6915, "step": 240 }, { "epoch": 0.06, "learning_rate": 3.987828255432777e-06, "loss": 1.6902, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.016555205552159e-06, "loss": 1.7059, "step": 260 }, { "epoch": 0.06, "learning_rate": 4.044185435607626e-06, "loss": 1.7044, "step": 270 }, { "epoch": 0.06, "learning_rate": 4.070799615107415e-06, "loss": 1.6984, "step": 280 }, { "epoch": 0.07, "learning_rate": 4.096469827889988e-06, "loss": 1.7203, "step": 290 }, { "epoch": 0.07, "learning_rate": 4.121260748862021e-06, "loss": 1.7046, "step": 300 }, { "epoch": 0.07, "learning_rate": 4.145230625795312e-06, "loss": 1.6732, "step": 310 }, { "epoch": 0.07, "learning_rate": 4.1684321036962525e-06, "loss": 1.6948, "step": 320 }, { "epoch": 0.08, "learning_rate": 4.190912921100477e-06, "loss": 1.656, "step": 330 }, { "epoch": 0.08, "learning_rate": 4.212716501452232e-06, "loss": 1.6729, "step": 340 }, { "epoch": 0.08, "learning_rate": 4.233882457984791e-06, "loss": 1.7092, "step": 350 }, { "epoch": 0.08, "learning_rate": 4.2544470268536555e-06, "loss": 1.6883, "step": 360 }, { "epoch": 0.08, "learning_rate": 4.27444344042015e-06, "loss": 1.6877, "step": 370 }, { "epoch": 0.09, "learning_rate": 4.293902250342989e-06, "loss": 1.6774, "step": 380 }, { "epoch": 0.09, "learning_rate": 4.312851608364853e-06, "loss": 1.6957, "step": 390 }, { "epoch": 0.09, "learning_rate": 4.3313175112718595e-06, "loss": 1.6848, "step": 400 }, { "epoch": 0.09, "learning_rate": 4.3493240153753665e-06, "loss": 1.682, "step": 410 }, { "epoch": 0.1, "learning_rate": 4.366893424956263e-06, "loss": 1.6724, "step": 420 }, { "epoch": 0.1, "learning_rate": 4.38404645837504e-06, "loss": 1.7079, "step": 430 }, { "epoch": 0.1, "learning_rate": 4.400802394950703e-06, "loss": 1.6605, "step": 440 }, { "epoch": 0.1, "learning_rate": 4.4171792052198945e-06, "loss": 1.6822, "step": 450 }, { "epoch": 0.11, "learning_rate": 4.433193666783084e-06, "loss": 1.6731, "step": 460 }, { "epoch": 0.11, "learning_rate": 4.448861467610187e-06, "loss": 1.6648, "step": 470 }, { "epoch": 0.11, "learning_rate": 4.4641972984001906e-06, "loss": 1.6781, "step": 480 }, { "epoch": 0.11, "learning_rate": 4.479214935357724e-06, "loss": 1.6752, "step": 490 }, { "epoch": 0.11, "learning_rate": 4.493927314555554e-06, "loss": 1.6754, "step": 500 }, { "epoch": 0.11, "eval_webgpt_accuracy": 0.5055517960817275, "eval_webgpt_loss": 2.15625, "eval_webgpt_runtime": 39.0916, "eval_webgpt_samples_per_second": 100.175, "eval_webgpt_steps_per_second": 1.253, "step": 500 }, { "epoch": 0.11, "eval_prompt_dialogue_accuracy": 0.6254543673617606, "eval_prompt_dialogue_loss": 1.357421875, "eval_prompt_dialogue_runtime": 71.3081, "eval_prompt_dialogue_samples_per_second": 144.57, "eval_prompt_dialogue_steps_per_second": 1.809, "step": 500 }, { "epoch": 0.11, "eval_adversarial_qa_accuracy": 0.8029728725380899, "eval_adversarial_qa_loss": 0.70654296875, "eval_adversarial_qa_runtime": 20.7874, "eval_adversarial_qa_samples_per_second": 144.318, "eval_adversarial_qa_steps_per_second": 1.828, "step": 500 }, { "epoch": 0.11, "eval_xsum_accuracy": 0.632906181388279, "eval_xsum_loss": 1.3935546875, "eval_xsum_runtime": 122.5752, "eval_xsum_samples_per_second": 92.449, "eval_xsum_steps_per_second": 1.158, "step": 500 }, { "epoch": 0.11, "eval_cnn_dailymail_accuracy": 0.7001129736496595, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 144.5118, "eval_cnn_dailymail_samples_per_second": 92.505, "eval_cnn_dailymail_steps_per_second": 1.163, "step": 500 }, { "epoch": 0.11, "eval_multi_news_accuracy": 0.5801641857474902, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 62.7124, "eval_multi_news_samples_per_second": 89.647, "eval_multi_news_steps_per_second": 1.132, "step": 500 }, { "epoch": 0.11, "eval_scitldr_accuracy": 0.4978125, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 8.0438, "eval_scitldr_samples_per_second": 76.954, "eval_scitldr_steps_per_second": 0.995, "step": 500 }, { "epoch": 0.11, "eval_joke_accuracy": 0.5093821076573162, "eval_joke_loss": 2.1171875, "eval_joke_runtime": 0.9767, "eval_joke_samples_per_second": 77.813, "eval_joke_steps_per_second": 1.024, "step": 500 }, { "epoch": 0.11, "eval_gsm8k_accuracy": 0.7808137739345274, "eval_gsm8k_loss": 0.8134765625, "eval_gsm8k_runtime": 9.4148, "eval_gsm8k_samples_per_second": 140.099, "eval_gsm8k_steps_per_second": 1.806, "step": 500 }, { "epoch": 0.11, "eval_dive_mt_accuracy": 0.7367253212240054, "eval_dive_mt_loss": 1.04296875, "eval_dive_mt_runtime": 10.0465, "eval_dive_mt_samples_per_second": 128.403, "eval_dive_mt_steps_per_second": 1.692, "step": 500 }, { "epoch": 0.11, "eval_math_qa_accuracy": 0.608876541257212, "eval_math_qa_loss": 1.6689453125, "eval_math_qa_runtime": 30.5447, "eval_math_qa_samples_per_second": 146.507, "eval_math_qa_steps_per_second": 1.833, "step": 500 }, { "epoch": 0.11, "eval_essay_instruction_accuracy": 0.6053833226455565, "eval_essay_instruction_loss": 1.8876953125, "eval_essay_instruction_runtime": 8.3301, "eval_essay_instruction_samples_per_second": 49.579, "eval_essay_instruction_steps_per_second": 0.72, "step": 500 }, { "epoch": 0.11, "eval_tldr_news_accuracy": 0.6061431123968348, "eval_tldr_news_loss": 1.697265625, "eval_tldr_news_runtime": 5.1098, "eval_tldr_news_samples_per_second": 155.389, "eval_tldr_news_steps_per_second": 1.957, "step": 500 }, { "epoch": 0.11, "eval_reddit_eli5_accuracy": 0.46120351563185963, "eval_reddit_eli5_loss": 2.423828125, "eval_reddit_eli5_runtime": 107.5158, "eval_reddit_eli5_samples_per_second": 91.261, "eval_reddit_eli5_steps_per_second": 1.144, "step": 500 }, { "epoch": 0.11, "eval_reddit_asks_accuracy": 0.4690436591507088, "eval_reddit_asks_loss": 2.412109375, "eval_reddit_asks_runtime": 32.2161, "eval_reddit_asks_samples_per_second": 70.803, "eval_reddit_asks_steps_per_second": 0.9, "step": 500 }, { "epoch": 0.11, "eval_reddit_askh_accuracy": 0.466922516495131, "eval_reddit_askh_loss": 2.513671875, "eval_reddit_askh_runtime": 61.0062, "eval_reddit_askh_samples_per_second": 80.336, "eval_reddit_askh_steps_per_second": 1.016, "step": 500 }, { "epoch": 0.11, "eval_wmt2019_zh-en_accuracy": 0.6711313468964325, "eval_wmt2019_zh-en_loss": 1.44140625, "eval_wmt2019_zh-en_runtime": 27.1705, "eval_wmt2019_zh-en_samples_per_second": 146.519, "eval_wmt2019_zh-en_steps_per_second": 1.84, "step": 500 }, { "epoch": 0.11, "eval_wmt2019_fr-de_accuracy": 0.751335577309082, "eval_wmt2019_fr-de_loss": 0.9892578125, "eval_wmt2019_fr-de_runtime": 9.8591, "eval_wmt2019_fr-de_samples_per_second": 153.361, "eval_wmt2019_fr-de_steps_per_second": 1.927, "step": 500 }, { "epoch": 0.11, "eval_wmt2019_ru-en_accuracy": 0.7610682787220373, "eval_wmt2019_ru-en_loss": 0.92138671875, "eval_wmt2019_ru-en_runtime": 21.983, "eval_wmt2019_ru-en_samples_per_second": 136.469, "eval_wmt2019_ru-en_steps_per_second": 1.729, "step": 500 }, { "epoch": 0.11, "eval_wmt2019_de-en_accuracy": 0.7658361423127319, "eval_wmt2019_de-en_loss": 0.92041015625, "eval_wmt2019_de-en_runtime": 17.0498, "eval_wmt2019_de-en_samples_per_second": 175.838, "eval_wmt2019_de-en_steps_per_second": 2.229, "step": 500 }, { "epoch": 0.11, "eval_ted_trans_de-ja_accuracy": 0.6635957565605807, "eval_ted_trans_de-ja_loss": 1.4384765625, "eval_ted_trans_de-ja_runtime": 7.9688, "eval_ted_trans_de-ja_samples_per_second": 90.101, "eval_ted_trans_de-ja_steps_per_second": 1.129, "step": 500 }, { "epoch": 0.11, "eval_ted_trans_en-ja_accuracy": 0.6737063575554276, "eval_ted_trans_en-ja_loss": 1.3544921875, "eval_ted_trans_en-ja_runtime": 9.6629, "eval_ted_trans_en-ja_samples_per_second": 82.894, "eval_ted_trans_en-ja_steps_per_second": 1.138, "step": 500 }, { "epoch": 0.11, "eval_ted_trans_en-hi_accuracy": 0.6986381322957198, "eval_ted_trans_en-hi_loss": 1.1357421875, "eval_ted_trans_en-hi_runtime": 2.3375, "eval_ted_trans_en-hi_samples_per_second": 44.064, "eval_ted_trans_en-hi_steps_per_second": 0.856, "step": 500 }, { "epoch": 0.11, "eval_ted_trans_en-es_accuracy": 0.7880831502109065, "eval_ted_trans_en-es_loss": 0.87353515625, "eval_ted_trans_en-es_runtime": 8.2834, "eval_ted_trans_en-es_samples_per_second": 99.718, "eval_ted_trans_en-es_steps_per_second": 1.328, "step": 500 }, { "epoch": 0.11, "eval_private_tuning_accuracy": 0.6889973407198902, "eval_private_tuning_loss": 1.130859375, "eval_private_tuning_runtime": 142.1785, "eval_private_tuning_samples_per_second": 148.954, "eval_private_tuning_steps_per_second": 1.864, "step": 500 }, { "epoch": 0.11, "eval_samsum_accuracy": 0.6474302924317498, "eval_samsum_loss": 1.27734375, "eval_samsum_runtime": 12.2877, "eval_samsum_samples_per_second": 66.571, "eval_samsum_steps_per_second": 0.895, "step": 500 }, { "epoch": 0.11, "eval_prosocial_dialogue_accuracy": 0.5408795463448586, "eval_prosocial_dialogue_loss": 1.7060546875, "eval_prosocial_dialogue_runtime": 48.1618, "eval_prosocial_dialogue_samples_per_second": 560.257, "eval_prosocial_dialogue_steps_per_second": 7.018, "step": 500 }, { "epoch": 0.11, "eval_oa_translated_accuracy": 0.719222779150248, "eval_oa_translated_loss": 1.1240234375, "eval_oa_translated_runtime": 59.7453, "eval_oa_translated_samples_per_second": 86.484, "eval_oa_translated_steps_per_second": 1.088, "step": 500 }, { "epoch": 0.11, "eval_wikihow_accuracy": 0.622957980862571, "eval_wikihow_loss": 1.7578125, "eval_wikihow_runtime": 15.5342, "eval_wikihow_samples_per_second": 147.61, "eval_wikihow_steps_per_second": 1.867, "step": 500 }, { "epoch": 0.11, "eval_explain_prosocial_accuracy": 0.6863205647867285, "eval_explain_prosocial_loss": 1.310546875, "eval_explain_prosocial_runtime": 111.3962, "eval_explain_prosocial_samples_per_second": 549.821, "eval_explain_prosocial_steps_per_second": 6.876, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.5083465988888945e-06, "loss": 1.6702, "step": 510 }, { "epoch": 0.12, "learning_rate": 4.5224842384899045e-06, "loss": 1.6841, "step": 520 }, { "epoch": 0.12, "learning_rate": 4.5363510253542444e-06, "loss": 1.6574, "step": 530 }, { "epoch": 0.12, "learning_rate": 4.549957142832593e-06, "loss": 1.673, "step": 540 }, { "epoch": 0.13, "learning_rate": 4.563312210555719e-06, "loss": 1.6541, "step": 550 }, { "epoch": 0.13, "learning_rate": 4.576425325289549e-06, "loss": 1.6516, "step": 560 }, { "epoch": 0.13, "learning_rate": 4.589305098154845e-06, "loss": 1.6717, "step": 570 }, { "epoch": 0.13, "learning_rate": 4.601959688592886e-06, "loss": 1.6191, "step": 580 }, { "epoch": 0.14, "learning_rate": 4.614396835412691e-06, "loss": 1.6685, "step": 590 }, { "epoch": 0.14, "learning_rate": 4.626623885215616e-06, "loss": 1.6424, "step": 600 }, { "epoch": 0.14, "learning_rate": 4.638647818458763e-06, "loss": 1.6391, "step": 610 }, { "epoch": 0.14, "learning_rate": 4.650475273388737e-06, "loss": 1.6604, "step": 620 }, { "epoch": 0.14, "learning_rate": 4.662112568051194e-06, "loss": 1.6693, "step": 630 }, { "epoch": 0.15, "learning_rate": 4.673565720558918e-06, "loss": 1.6437, "step": 640 }, { "epoch": 0.15, "learning_rate": 4.6848404677811685e-06, "loss": 1.6688, "step": 650 }, { "epoch": 0.15, "learning_rate": 4.695942282599635e-06, "loss": 1.6521, "step": 660 }, { "epoch": 0.15, "learning_rate": 4.706876389860915e-06, "loss": 1.6568, "step": 670 }, { "epoch": 0.16, "learning_rate": 4.717647781141908e-06, "loss": 1.6462, "step": 680 }, { "epoch": 0.16, "learning_rate": 4.7282612284325845e-06, "loss": 1.6463, "step": 690 }, { "epoch": 0.16, "learning_rate": 4.738721296830016e-06, "loss": 1.6495, "step": 700 }, { "epoch": 0.16, "learning_rate": 4.749032356328167e-06, "loss": 1.6536, "step": 710 }, { "epoch": 0.17, "learning_rate": 4.759198592779668e-06, "loss": 1.6366, "step": 720 }, { "epoch": 0.17, "learning_rate": 4.769224018098397e-06, "loss": 1.6626, "step": 730 }, { "epoch": 0.17, "learning_rate": 4.7791124797650865e-06, "loss": 1.616, "step": 740 }, { "epoch": 0.17, "learning_rate": 4.788867669692332e-06, "loss": 1.6401, "step": 750 }, { "epoch": 0.17, "eval_webgpt_accuracy": 0.5064634455053698, "eval_webgpt_loss": 2.14453125, "eval_webgpt_runtime": 38.6915, "eval_webgpt_samples_per_second": 101.211, "eval_webgpt_steps_per_second": 1.266, "step": 750 }, { "epoch": 0.17, "eval_prompt_dialogue_accuracy": 0.6281575769303819, "eval_prompt_dialogue_loss": 1.3408203125, "eval_prompt_dialogue_runtime": 73.877, "eval_prompt_dialogue_samples_per_second": 139.543, "eval_prompt_dialogue_steps_per_second": 1.746, "step": 750 }, { "epoch": 0.17, "eval_adversarial_qa_accuracy": 0.8144184318097362, "eval_adversarial_qa_loss": 0.67626953125, "eval_adversarial_qa_runtime": 20.0332, "eval_adversarial_qa_samples_per_second": 149.751, "eval_adversarial_qa_steps_per_second": 1.897, "step": 750 }, { "epoch": 0.17, "eval_xsum_accuracy": 0.6340356358723188, "eval_xsum_loss": 1.3828125, "eval_xsum_runtime": 121.341, "eval_xsum_samples_per_second": 93.39, "eval_xsum_steps_per_second": 1.17, "step": 750 }, { "epoch": 0.17, "eval_cnn_dailymail_accuracy": 0.7028426612927849, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 144.2535, "eval_cnn_dailymail_samples_per_second": 92.67, "eval_cnn_dailymail_steps_per_second": 1.165, "step": 750 }, { "epoch": 0.17, "eval_multi_news_accuracy": 0.5819939666683152, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 61.526, "eval_multi_news_samples_per_second": 91.376, "eval_multi_news_steps_per_second": 1.154, "step": 750 }, { "epoch": 0.17, "eval_scitldr_accuracy": 0.491875, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 8.1765, "eval_scitldr_samples_per_second": 75.704, "eval_scitldr_steps_per_second": 0.978, "step": 750 }, { "epoch": 0.17, "eval_joke_accuracy": 0.5185746777862017, "eval_joke_loss": 2.07421875, "eval_joke_runtime": 0.7033, "eval_joke_samples_per_second": 108.069, "eval_joke_steps_per_second": 1.422, "step": 750 }, { "epoch": 0.17, "eval_gsm8k_accuracy": 0.7870444718962323, "eval_gsm8k_loss": 0.78466796875, "eval_gsm8k_runtime": 10.9372, "eval_gsm8k_samples_per_second": 120.597, "eval_gsm8k_steps_per_second": 1.554, "step": 750 }, { "epoch": 0.17, "eval_math_qa_accuracy": 0.619624940687815, "eval_math_qa_loss": 1.6083984375, "eval_math_qa_runtime": 31.4027, "eval_math_qa_samples_per_second": 142.504, "eval_math_qa_steps_per_second": 1.783, "step": 750 }, { "epoch": 0.17, "eval_essay_instruction_accuracy": 0.6070694115826017, "eval_essay_instruction_loss": 1.8740234375, "eval_essay_instruction_runtime": 8.0521, "eval_essay_instruction_samples_per_second": 51.291, "eval_essay_instruction_steps_per_second": 0.745, "step": 750 }, { "epoch": 0.17, "eval_tldr_news_accuracy": 0.615757678890496, "eval_tldr_news_loss": 1.66015625, "eval_tldr_news_runtime": 4.1264, "eval_tldr_news_samples_per_second": 192.418, "eval_tldr_news_steps_per_second": 2.423, "step": 750 }, { "epoch": 0.17, "eval_reddit_eli5_accuracy": 0.461742772350252, "eval_reddit_eli5_loss": 2.421875, "eval_reddit_eli5_runtime": 108.3649, "eval_reddit_eli5_samples_per_second": 90.546, "eval_reddit_eli5_steps_per_second": 1.135, "step": 750 }, { "epoch": 0.17, "eval_reddit_asks_accuracy": 0.4700219866226591, "eval_reddit_asks_loss": 2.41015625, "eval_reddit_asks_runtime": 31.416, "eval_reddit_asks_samples_per_second": 72.606, "eval_reddit_asks_steps_per_second": 0.923, "step": 750 }, { "epoch": 0.17, "eval_reddit_askh_accuracy": 0.46774579304106356, "eval_reddit_askh_loss": 2.5078125, "eval_reddit_askh_runtime": 61.093, "eval_reddit_askh_samples_per_second": 80.222, "eval_reddit_askh_steps_per_second": 1.015, "step": 750 }, { "epoch": 0.17, "eval_wmt2019_zh-en_accuracy": 0.6671902987021268, "eval_wmt2019_zh-en_loss": 1.4541015625, "eval_wmt2019_zh-en_runtime": 27.3556, "eval_wmt2019_zh-en_samples_per_second": 145.528, "eval_wmt2019_zh-en_steps_per_second": 1.828, "step": 750 }, { "epoch": 0.17, "eval_wmt2019_fr-de_accuracy": 0.7487164373574896, "eval_wmt2019_fr-de_loss": 0.9892578125, "eval_wmt2019_fr-de_runtime": 11.3417, "eval_wmt2019_fr-de_samples_per_second": 133.314, "eval_wmt2019_fr-de_steps_per_second": 1.675, "step": 750 }, { "epoch": 0.17, "eval_wmt2019_ru-en_accuracy": 0.7546621422248451, "eval_wmt2019_ru-en_loss": 0.94970703125, "eval_wmt2019_ru-en_runtime": 22.6465, "eval_wmt2019_ru-en_samples_per_second": 132.471, "eval_wmt2019_ru-en_steps_per_second": 1.678, "step": 750 }, { "epoch": 0.17, "eval_wmt2019_de-en_accuracy": 0.7651105551969012, "eval_wmt2019_de-en_loss": 0.92236328125, "eval_wmt2019_de-en_runtime": 16.3647, "eval_wmt2019_de-en_samples_per_second": 183.199, "eval_wmt2019_de-en_steps_per_second": 2.322, "step": 750 }, { "epoch": 0.17, "eval_ted_trans_de-ja_accuracy": 0.6670448957978744, "eval_ted_trans_de-ja_loss": 1.4306640625, "eval_ted_trans_de-ja_runtime": 8.4143, "eval_ted_trans_de-ja_samples_per_second": 85.331, "eval_ted_trans_de-ja_steps_per_second": 1.07, "step": 750 }, { "epoch": 0.17, "eval_ted_trans_en-ja_accuracy": 0.6718075628588398, "eval_ted_trans_en-ja_loss": 1.33203125, "eval_ted_trans_en-ja_runtime": 10.3712, "eval_ted_trans_en-ja_samples_per_second": 77.233, "eval_ted_trans_en-ja_steps_per_second": 1.061, "step": 750 }, { "epoch": 0.17, "eval_ted_trans_en-hi_accuracy": 0.6781445982723938, "eval_ted_trans_en-hi_loss": 1.2021484375, "eval_ted_trans_en-hi_runtime": 1.747, "eval_ted_trans_en-hi_samples_per_second": 58.959, "eval_ted_trans_en-hi_steps_per_second": 1.145, "step": 750 }, { "epoch": 0.17, "eval_ted_trans_en-es_accuracy": 0.787559638615369, "eval_ted_trans_en-es_loss": 0.88525390625, "eval_ted_trans_en-es_runtime": 9.0268, "eval_ted_trans_en-es_samples_per_second": 91.505, "eval_ted_trans_en-es_steps_per_second": 1.219, "step": 750 }, { "epoch": 0.17, "eval_private_tuning_accuracy": 0.693209861771278, "eval_private_tuning_loss": 1.1103515625, "eval_private_tuning_runtime": 144.1209, "eval_private_tuning_samples_per_second": 146.946, "eval_private_tuning_steps_per_second": 1.839, "step": 750 }, { "epoch": 0.17, "eval_samsum_accuracy": 0.6467502185951618, "eval_samsum_loss": 1.259765625, "eval_samsum_runtime": 9.1622, "eval_samsum_samples_per_second": 89.28, "eval_samsum_steps_per_second": 1.201, "step": 750 }, { "epoch": 0.17, "eval_prosocial_dialogue_accuracy": 0.5496461048716204, "eval_prosocial_dialogue_loss": 1.6904296875, "eval_prosocial_dialogue_runtime": 49.4898, "eval_prosocial_dialogue_samples_per_second": 545.224, "eval_prosocial_dialogue_steps_per_second": 6.83, "step": 750 }, { "epoch": 0.17, "eval_oa_translated_accuracy": 0.7254537658996713, "eval_oa_translated_loss": 1.0947265625, "eval_oa_translated_runtime": 57.4991, "eval_oa_translated_samples_per_second": 89.862, "eval_oa_translated_steps_per_second": 1.13, "step": 750 }, { "epoch": 0.17, "eval_wikihow_accuracy": 0.6223200665649702, "eval_wikihow_loss": 1.744140625, "eval_wikihow_runtime": 16.9927, "eval_wikihow_samples_per_second": 134.94, "eval_wikihow_steps_per_second": 1.707, "step": 750 }, { "epoch": 0.17, "eval_explain_prosocial_accuracy": 0.6895944881927522, "eval_explain_prosocial_loss": 1.2900390625, "eval_explain_prosocial_runtime": 109.8962, "eval_explain_prosocial_samples_per_second": 557.326, "eval_explain_prosocial_steps_per_second": 6.97, "step": 750 }, { "epoch": 0.17, "learning_rate": 4.798493132500121e-06, "loss": 1.6331, "step": 760 }, { "epoch": 0.18, "learning_rate": 4.8079922732483016e-06, "loss": 1.6242, "step": 770 }, { "epoch": 0.18, "learning_rate": 4.817368364668191e-06, "loss": 1.6471, "step": 780 }, { "epoch": 0.18, "learning_rate": 4.8266245539317745e-06, "loss": 1.6592, "step": 790 }, { "epoch": 0.18, "learning_rate": 4.835763868993521e-06, "loss": 1.6586, "step": 800 }, { "epoch": 0.19, "learning_rate": 4.844789224536785e-06, "loss": 1.6354, "step": 810 }, { "epoch": 0.19, "learning_rate": 4.853703427554027e-06, "loss": 1.6602, "step": 820 }, { "epoch": 0.19, "learning_rate": 4.862509182587578e-06, "loss": 1.652, "step": 830 }, { "epoch": 0.19, "learning_rate": 4.871209096655434e-06, "loss": 1.6451, "step": 840 }, { "epoch": 0.19, "learning_rate": 4.879805683884512e-06, "loss": 1.6404, "step": 850 }, { "epoch": 0.2, "learning_rate": 4.888301369871998e-06, "loss": 1.6411, "step": 860 }, { "epoch": 0.2, "learning_rate": 4.8966984957936845e-06, "loss": 1.6314, "step": 870 }, { "epoch": 0.2, "learning_rate": 4.904999322276735e-06, "loss": 1.6189, "step": 880 }, { "epoch": 0.2, "learning_rate": 4.913206033052878e-06, "loss": 1.6514, "step": 890 }, { "epoch": 0.21, "learning_rate": 4.921320738406821e-06, "loss": 1.6363, "step": 900 }, { "epoch": 0.21, "learning_rate": 4.929345478433492e-06, "loss": 1.6398, "step": 910 }, { "epoch": 0.21, "learning_rate": 4.937282226116702e-06, "loss": 1.6277, "step": 920 }, { "epoch": 0.21, "learning_rate": 4.945132890240829e-06, "loss": 1.6236, "step": 930 }, { "epoch": 0.22, "learning_rate": 4.952899318146298e-06, "loss": 1.6353, "step": 940 }, { "epoch": 0.22, "learning_rate": 4.96058329833879e-06, "loss": 1.6394, "step": 950 }, { "epoch": 0.22, "learning_rate": 4.968186562961406e-06, "loss": 1.6293, "step": 960 }, { "epoch": 0.22, "learning_rate": 4.975710790138337e-06, "loss": 1.6259, "step": 970 }, { "epoch": 0.22, "learning_rate": 4.9831576061979556e-06, "loss": 1.6124, "step": 980 }, { "epoch": 0.23, "learning_rate": 4.990528587782728e-06, "loss": 1.6569, "step": 990 }, { "epoch": 0.23, "learning_rate": 4.99782526385276e-06, "loss": 1.638, "step": 1000 }, { "epoch": 0.23, "eval_webgpt_accuracy": 0.5073254588442403, "eval_webgpt_loss": 2.140625, "eval_webgpt_runtime": 38.7594, "eval_webgpt_samples_per_second": 101.034, "eval_webgpt_steps_per_second": 1.264, "step": 1000 }, { "epoch": 0.23, "eval_prompt_dialogue_accuracy": 0.6310662447605156, "eval_prompt_dialogue_loss": 1.3232421875, "eval_prompt_dialogue_runtime": 75.506, "eval_prompt_dialogue_samples_per_second": 136.532, "eval_prompt_dialogue_steps_per_second": 1.708, "step": 1000 }, { "epoch": 0.23, "eval_adversarial_qa_accuracy": 0.8161278335191379, "eval_adversarial_qa_loss": 0.65185546875, "eval_adversarial_qa_runtime": 17.8135, "eval_adversarial_qa_samples_per_second": 168.411, "eval_adversarial_qa_steps_per_second": 2.133, "step": 1000 }, { "epoch": 0.23, "eval_xsum_accuracy": 0.6358828745144401, "eval_xsum_loss": 1.3759765625, "eval_xsum_runtime": 120.9036, "eval_xsum_samples_per_second": 93.728, "eval_xsum_steps_per_second": 1.174, "step": 1000 }, { "epoch": 0.23, "eval_cnn_dailymail_accuracy": 0.7034533296693231, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 143.0011, "eval_cnn_dailymail_samples_per_second": 93.482, "eval_cnn_dailymail_steps_per_second": 1.175, "step": 1000 }, { "epoch": 0.23, "eval_multi_news_accuracy": 0.58382374758914, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 62.8321, "eval_multi_news_samples_per_second": 89.477, "eval_multi_news_steps_per_second": 1.13, "step": 1000 }, { "epoch": 0.23, "eval_scitldr_accuracy": 0.495625, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 7.4344, "eval_scitldr_samples_per_second": 83.262, "eval_scitldr_steps_per_second": 1.076, "step": 1000 }, { "epoch": 0.23, "eval_joke_accuracy": 0.5290940106141016, "eval_joke_loss": 2.01953125, "eval_joke_runtime": 0.7811, "eval_joke_samples_per_second": 97.299, "eval_joke_steps_per_second": 1.28, "step": 1000 }, { "epoch": 0.23, "eval_gsm8k_accuracy": 0.7911056207535516, "eval_gsm8k_loss": 0.76318359375, "eval_gsm8k_runtime": 9.9142, "eval_gsm8k_samples_per_second": 133.041, "eval_gsm8k_steps_per_second": 1.715, "step": 1000 }, { "epoch": 0.23, "eval_math_qa_accuracy": 0.6306243424083842, "eval_math_qa_loss": 1.548828125, "eval_math_qa_runtime": 31.7722, "eval_math_qa_samples_per_second": 140.847, "eval_math_qa_steps_per_second": 1.763, "step": 1000 }, { "epoch": 0.23, "eval_essay_instruction_accuracy": 0.6082469097583089, "eval_essay_instruction_loss": 1.8671875, "eval_essay_instruction_runtime": 8.3983, "eval_essay_instruction_samples_per_second": 49.176, "eval_essay_instruction_steps_per_second": 0.714, "step": 1000 }, { "epoch": 0.23, "eval_tldr_news_accuracy": 0.6160980175274398, "eval_tldr_news_loss": 1.6416015625, "eval_tldr_news_runtime": 3.6256, "eval_tldr_news_samples_per_second": 218.998, "eval_tldr_news_steps_per_second": 2.758, "step": 1000 }, { "epoch": 0.23, "eval_reddit_eli5_accuracy": 0.4611581507518863, "eval_reddit_eli5_loss": 2.419921875, "eval_reddit_eli5_runtime": 107.6335, "eval_reddit_eli5_samples_per_second": 91.161, "eval_reddit_eli5_steps_per_second": 1.143, "step": 1000 }, { "epoch": 0.23, "eval_reddit_asks_accuracy": 0.4688479936563187, "eval_reddit_asks_loss": 2.408203125, "eval_reddit_asks_runtime": 31.0014, "eval_reddit_asks_samples_per_second": 73.577, "eval_reddit_asks_steps_per_second": 0.935, "step": 1000 }, { "epoch": 0.23, "eval_reddit_askh_accuracy": 0.4678089398518734, "eval_reddit_askh_loss": 2.505859375, "eval_reddit_askh_runtime": 58.2699, "eval_reddit_askh_samples_per_second": 84.109, "eval_reddit_askh_steps_per_second": 1.064, "step": 1000 }, { "epoch": 0.23, "eval_wmt2019_zh-en_accuracy": 0.6663175800777634, "eval_wmt2019_zh-en_loss": 1.4599609375, "eval_wmt2019_zh-en_runtime": 29.9768, "eval_wmt2019_zh-en_samples_per_second": 132.803, "eval_wmt2019_zh-en_steps_per_second": 1.668, "step": 1000 }, { "epoch": 0.23, "eval_wmt2019_fr-de_accuracy": 0.7535103098381767, "eval_wmt2019_fr-de_loss": 0.9736328125, "eval_wmt2019_fr-de_runtime": 10.3533, "eval_wmt2019_fr-de_samples_per_second": 146.04, "eval_wmt2019_fr-de_steps_per_second": 1.835, "step": 1000 }, { "epoch": 0.23, "eval_wmt2019_ru-en_accuracy": 0.7577544408610879, "eval_wmt2019_ru-en_loss": 0.93603515625, "eval_wmt2019_ru-en_runtime": 22.7659, "eval_wmt2019_ru-en_samples_per_second": 131.776, "eval_wmt2019_ru-en_steps_per_second": 1.669, "step": 1000 }, { "epoch": 0.23, "eval_wmt2019_de-en_accuracy": 0.7657056972240155, "eval_wmt2019_de-en_loss": 0.9150390625, "eval_wmt2019_de-en_runtime": 15.5528, "eval_wmt2019_de-en_samples_per_second": 192.762, "eval_wmt2019_de-en_steps_per_second": 2.443, "step": 1000 }, { "epoch": 0.23, "eval_ted_trans_de-ja_accuracy": 0.6747348791651755, "eval_ted_trans_de-ja_loss": 1.3994140625, "eval_ted_trans_de-ja_runtime": 8.3691, "eval_ted_trans_de-ja_samples_per_second": 85.792, "eval_ted_trans_de-ja_steps_per_second": 1.075, "step": 1000 }, { "epoch": 0.23, "eval_ted_trans_en-ja_accuracy": 0.6812527237431164, "eval_ted_trans_en-ja_loss": 1.3115234375, "eval_ted_trans_en-ja_runtime": 9.3233, "eval_ted_trans_en-ja_samples_per_second": 85.914, "eval_ted_trans_en-ja_steps_per_second": 1.18, "step": 1000 }, { "epoch": 0.23, "eval_ted_trans_en-hi_accuracy": 0.6843067779174763, "eval_ted_trans_en-hi_loss": 1.19921875, "eval_ted_trans_en-hi_runtime": 2.8696, "eval_ted_trans_en-hi_samples_per_second": 35.893, "eval_ted_trans_en-hi_steps_per_second": 0.697, "step": 1000 }, { "epoch": 0.23, "eval_ted_trans_en-es_accuracy": 0.7943555499559027, "eval_ted_trans_en-es_loss": 0.85009765625, "eval_ted_trans_en-es_runtime": 8.4213, "eval_ted_trans_en-es_samples_per_second": 98.085, "eval_ted_trans_en-es_steps_per_second": 1.306, "step": 1000 }, { "epoch": 0.23, "eval_private_tuning_accuracy": 0.6955408449271764, "eval_private_tuning_loss": 1.0966796875, "eval_private_tuning_runtime": 144.1155, "eval_private_tuning_samples_per_second": 146.952, "eval_private_tuning_steps_per_second": 1.839, "step": 1000 }, { "epoch": 0.23, "eval_samsum_accuracy": 0.651219275235597, "eval_samsum_loss": 1.2451171875, "eval_samsum_runtime": 10.3624, "eval_samsum_samples_per_second": 78.939, "eval_samsum_steps_per_second": 1.062, "step": 1000 }, { "epoch": 0.23, "eval_prosocial_dialogue_accuracy": 0.5541225644750905, "eval_prosocial_dialogue_loss": 1.671875, "eval_prosocial_dialogue_runtime": 49.1807, "eval_prosocial_dialogue_samples_per_second": 548.65, "eval_prosocial_dialogue_steps_per_second": 6.873, "step": 1000 }, { "epoch": 0.23, "eval_oa_translated_accuracy": 0.7297566303926173, "eval_oa_translated_loss": 1.0712890625, "eval_oa_translated_runtime": 57.6497, "eval_oa_translated_samples_per_second": 89.628, "eval_oa_translated_steps_per_second": 1.127, "step": 1000 }, { "epoch": 0.23, "eval_wikihow_accuracy": 0.6223200665649702, "eval_wikihow_loss": 1.73828125, "eval_wikihow_runtime": 16.8591, "eval_wikihow_samples_per_second": 136.009, "eval_wikihow_steps_per_second": 1.72, "step": 1000 }, { "epoch": 0.23, "eval_explain_prosocial_accuracy": 0.6887323092680878, "eval_explain_prosocial_loss": 1.2744140625, "eval_explain_prosocial_runtime": 111.2104, "eval_explain_prosocial_samples_per_second": 550.74, "eval_explain_prosocial_steps_per_second": 6.888, "step": 1000 } ], "max_steps": 17452, "num_train_epochs": 4, "total_flos": 1.6761689779124306e+19, "trial_name": null, "trial_params": null }