{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.029488434635935788, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4084967333570947e-06, "loss": 2.0409, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.0507482022971233e-06, "loss": 1.8182, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.385606273598312e-06, "loss": 1.6715, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.6136695401116585e-06, "loss": 1.7864, "step": 40 }, { "epoch": 0.0, "learning_rate": 2.7868297632261957e-06, "loss": 1.6323, "step": 50 }, { "epoch": 0.0, "learning_rate": 2.926458092787486e-06, "loss": 1.6948, "step": 60 }, { "epoch": 0.0, "learning_rate": 3.0434580045013773e-06, "loss": 1.6492, "step": 70 }, { "epoch": 0.0, "learning_rate": 3.1441512086208035e-06, "loss": 1.6454, "step": 80 }, { "epoch": 0.0, "learning_rate": 3.232532087697698e-06, "loss": 1.6685, "step": 90 }, { "epoch": 0.0, "learning_rate": 3.3112862237770753e-06, "loss": 1.5988, "step": 100 }, { "epoch": 0.0, "learning_rate": 3.3823062961420163e-06, "loss": 1.5397, "step": 110 }, { "epoch": 0.0, "learning_rate": 3.446976436243603e-06, "loss": 1.6389, "step": 120 }, { "epoch": 0.0, "learning_rate": 3.506339534926595e-06, "loss": 1.5864, "step": 130 }, { "epoch": 0.0, "learning_rate": 3.5612009452606784e-06, "loss": 1.6896, "step": 140 }, { "epoch": 0.0, "learning_rate": 3.612195557913627e-06, "loss": 1.6217, "step": 150 }, { "epoch": 0.0, "learning_rate": 3.65983275401539e-06, "loss": 1.6265, "step": 160 }, { "epoch": 0.01, "learning_rate": 3.7045274519126395e-06, "loss": 1.6128, "step": 170 }, { "epoch": 0.01, "learning_rate": 3.7466221106030114e-06, "loss": 1.5282, "step": 180 }, { "epoch": 0.01, "learning_rate": 3.786402677560832e-06, "loss": 1.6623, "step": 190 }, { "epoch": 0.01, "learning_rate": 3.824110376935989e-06, "loss": 1.5958, "step": 200 }, { "epoch": 0.01, "learning_rate": 3.8599505757615295e-06, "loss": 1.6162, "step": 210 }, { "epoch": 0.01, "learning_rate": 3.894099556414216e-06, "loss": 1.5936, "step": 220 }, { "epoch": 0.01, "learning_rate": 3.9267097619885385e-06, "loss": 1.5658, "step": 230 }, { "epoch": 0.01, "learning_rate": 3.95791391001684e-06, "loss": 1.627, "step": 240 }, { "epoch": 0.01, "learning_rate": 3.987828255432777e-06, "loss": 1.6155, "step": 250 }, { "epoch": 0.01, "eval_gsm8k_hard_accuracy": 0.8835755873132202, "eval_gsm8k_hard_loss": 0.51513671875, "eval_gsm8k_hard_runtime": 6.8667, "eval_gsm8k_hard_samples_per_second": 38.446, "eval_gsm8k_hard_steps_per_second": 0.437, "step": 250 }, { "epoch": 0.01, "eval_webgpt_accuracy": 0.49985688262224126, "eval_webgpt_loss": 2.197265625, "eval_webgpt_runtime": 38.8229, "eval_webgpt_samples_per_second": 100.868, "eval_webgpt_steps_per_second": 1.056, "step": 250 }, { "epoch": 0.01, "eval_squad_v2_accuracy": 0.8732225651432517, "eval_squad_v2_loss": 0.394775390625, "eval_squad_v2_runtime": 212.1787, "eval_squad_v2_samples_per_second": 122.84, "eval_squad_v2_steps_per_second": 1.282, "step": 250 }, { "epoch": 0.01, "eval_adversarial_qa_accuracy": 0.7885436255634161, "eval_adversarial_qa_loss": 0.84423828125, "eval_adversarial_qa_runtime": 53.4145, "eval_adversarial_qa_samples_per_second": 112.329, "eval_adversarial_qa_steps_per_second": 1.179, "step": 250 }, { "epoch": 0.01, "eval_private_tuning_accuracy": 0.6697048468296535, "eval_private_tuning_loss": 1.234375, "eval_private_tuning_runtime": 147.3821, "eval_private_tuning_samples_per_second": 143.695, "eval_private_tuning_steps_per_second": 1.5, "step": 250 }, { "epoch": 0.01, "eval_oa_translated_accuracy": 0.6888436472902636, "eval_oa_translated_loss": 1.271484375, "eval_oa_translated_runtime": 1288.641, "eval_oa_translated_samples_per_second": 91.051, "eval_oa_translated_steps_per_second": 0.949, "step": 250 }, { "epoch": 0.01, "eval_prosocial_dialogue_accuracy": 0.5277240036359349, "eval_prosocial_dialogue_loss": 1.830078125, "eval_prosocial_dialogue_runtime": 61.2751, "eval_prosocial_dialogue_samples_per_second": 440.358, "eval_prosocial_dialogue_steps_per_second": 4.602, "step": 250 }, { "epoch": 0.01, "eval_math_qa_accuracy": 0.5650762200656649, "eval_math_qa_loss": 1.912109375, "eval_math_qa_runtime": 43.4013, "eval_math_qa_samples_per_second": 137.507, "eval_math_qa_steps_per_second": 1.452, "step": 250 }, { "epoch": 0.01, "eval_wikihow_accuracy": 0.6096311191235613, "eval_wikihow_loss": 1.8701171875, "eval_wikihow_runtime": 16.5775, "eval_wikihow_samples_per_second": 138.32, "eval_wikihow_steps_per_second": 1.448, "step": 250 }, { "epoch": 0.01, "eval_joke_accuracy": 0.49194465504169826, "eval_joke_loss": 2.216796875, "eval_joke_runtime": 2.4324, "eval_joke_samples_per_second": 31.245, "eval_joke_steps_per_second": 0.411, "step": 250 }, { "epoch": 0.01, "eval_gsm8k_accuracy": 0.7494717398301172, "eval_gsm8k_loss": 0.9765625, "eval_gsm8k_runtime": 10.7538, "eval_gsm8k_samples_per_second": 139.021, "eval_gsm8k_steps_per_second": 1.488, "step": 250 }, { "epoch": 0.01, "eval_ted_trans_en-hi_accuracy": 0.6902974158946855, "eval_ted_trans_en-hi_loss": 1.1455078125, "eval_ted_trans_en-hi_runtime": 4.0656, "eval_ted_trans_en-hi_samples_per_second": 25.335, "eval_ted_trans_en-hi_steps_per_second": 0.492, "step": 250 }, { "epoch": 0.01, "eval_ted_trans_de-ja_accuracy": 0.6504194889162561, "eval_ted_trans_de-ja_loss": 1.52734375, "eval_ted_trans_de-ja_runtime": 8.8337, "eval_ted_trans_de-ja_samples_per_second": 81.279, "eval_ted_trans_de-ja_steps_per_second": 0.906, "step": 250 }, { "epoch": 0.01, "eval_ted_trans_nl-en_accuracy": 0.7506186489759386, "eval_ted_trans_nl-en_loss": 1.091796875, "eval_ted_trans_nl-en_runtime": 8.871, "eval_ted_trans_nl-en_samples_per_second": 86.913, "eval_ted_trans_nl-en_steps_per_second": 1.015, "step": 250 }, { "epoch": 0.01, "eval_ted_trans_en-ja_accuracy": 0.6557647009776333, "eval_ted_trans_en-ja_loss": 1.427734375, "eval_ted_trans_en-ja_runtime": 9.6821, "eval_ted_trans_en-ja_samples_per_second": 82.73, "eval_ted_trans_en-ja_steps_per_second": 0.93, "step": 250 }, { "epoch": 0.01, "eval_ted_trans_en-es_accuracy": 0.7831022379328372, "eval_ted_trans_en-es_loss": 0.89599609375, "eval_ted_trans_en-es_runtime": 7.8367, "eval_ted_trans_en-es_samples_per_second": 105.401, "eval_ted_trans_en-es_steps_per_second": 1.148, "step": 250 }, { "epoch": 0.01, "eval_ted_trans_en-ms_accuracy": 0.689470871191876, "eval_ted_trans_en-ms_loss": 1.4052734375, "eval_ted_trans_en-ms_runtime": 1.3714, "eval_ted_trans_en-ms_samples_per_second": 30.625, "eval_ted_trans_en-ms_steps_per_second": 0.729, "step": 250 }, { "epoch": 0.01, "eval_xsum_accuracy": 0.6193833980292625, "eval_xsum_loss": 1.4599609375, "eval_xsum_runtime": 434.5368, "eval_xsum_samples_per_second": 93.914, "eval_xsum_steps_per_second": 0.98, "step": 250 }, { "epoch": 0.01, "eval_cnn_dailymail_accuracy": 0.6712703040399833, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 624.2796, "eval_cnn_dailymail_samples_per_second": 91.983, "eval_cnn_dailymail_steps_per_second": 0.96, "step": 250 }, { "epoch": 0.01, "eval_multi_news_accuracy": 0.5461545789406833, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 102.6315, "eval_multi_news_samples_per_second": 87.644, "eval_multi_news_steps_per_second": 0.916, "step": 250 }, { "epoch": 0.01, "eval_tldr_news_accuracy": 0.5329163923633969, "eval_tldr_news_loss": 2.1640625, "eval_tldr_news_runtime": 7.304, "eval_tldr_news_samples_per_second": 195.509, "eval_tldr_news_steps_per_second": 2.054, "step": 250 }, { "epoch": 0.01, "eval_scitldr_accuracy": 0.5056726094003241, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 6.0172, "eval_scitldr_samples_per_second": 66.309, "eval_scitldr_steps_per_second": 0.831, "step": 250 }, { "epoch": 0.01, "eval_samsum_accuracy": 0.6255323175925049, "eval_samsum_loss": 1.390625, "eval_samsum_runtime": 31.2731, "eval_samsum_samples_per_second": 94.234, "eval_samsum_steps_per_second": 0.991, "step": 250 }, { "epoch": 0.01, "eval_debate_sum_accuracy": 0.934249098160658, "eval_debate_sum_loss": 0.363525390625, "eval_debate_sum_runtime": 539.9242, "eval_debate_sum_samples_per_second": 89.113, "eval_debate_sum_steps_per_second": 0.93, "step": 250 }, { "epoch": 0.01, "eval_billsum_accuracy": 0.6761362636279469, "eval_billsum_loss": 1.3681640625, "eval_billsum_runtime": 47.9835, "eval_billsum_samples_per_second": 78.985, "eval_billsum_steps_per_second": 0.834, "step": 250 }, { "epoch": 0.01, "eval_wmt2019_zh-en_accuracy": 0.6633805205208717, "eval_wmt2019_zh-en_loss": 1.474609375, "eval_wmt2019_zh-en_runtime": 27.1758, "eval_wmt2019_zh-en_samples_per_second": 146.491, "eval_wmt2019_zh-en_steps_per_second": 1.545, "step": 250 }, { "epoch": 0.01, "eval_wmt2019_ru-en_accuracy": 0.7568385011868931, "eval_wmt2019_ru-en_loss": 0.9365234375, "eval_wmt2019_ru-en_runtime": 21.7646, "eval_wmt2019_ru-en_samples_per_second": 137.839, "eval_wmt2019_ru-en_steps_per_second": 1.47, "step": 250 }, { "epoch": 0.01, "eval_wmt2019_de-en_accuracy": 0.7579152898768399, "eval_wmt2019_de-en_loss": 0.94921875, "eval_wmt2019_de-en_runtime": 15.095, "eval_wmt2019_de-en_samples_per_second": 198.609, "eval_wmt2019_de-en_steps_per_second": 2.12, "step": 250 }, { "epoch": 0.01, "eval_wmt2019_fr-de_accuracy": 0.7458755561047948, "eval_wmt2019_fr-de_loss": 1.0107421875, "eval_wmt2019_fr-de_runtime": 10.8089, "eval_wmt2019_fr-de_samples_per_second": 139.885, "eval_wmt2019_fr-de_steps_per_second": 1.48, "step": 250 }, { "epoch": 0.01, "eval_essay_instruction_accuracy": 0.5980087566061517, "eval_essay_instruction_loss": 1.939453125, "eval_essay_instruction_runtime": 8.5102, "eval_essay_instruction_samples_per_second": 48.53, "eval_essay_instruction_steps_per_second": 0.588, "step": 250 }, { "epoch": 0.01, "eval_reddit_eli5_accuracy": 0.4587571011238715, "eval_reddit_eli5_loss": 2.43359375, "eval_reddit_eli5_runtime": 592.806, "eval_reddit_eli5_samples_per_second": 91.981, "eval_reddit_eli5_steps_per_second": 0.958, "step": 250 }, { "epoch": 0.01, "eval_reddit_askh_accuracy": 0.46236593037589085, "eval_reddit_askh_loss": 2.53125, "eval_reddit_askh_runtime": 245.8916, "eval_reddit_askh_samples_per_second": 80.137, "eval_reddit_askh_steps_per_second": 0.838, "step": 250 }, { "epoch": 0.01, "eval_reddit_asks_accuracy": 0.4693832359074744, "eval_reddit_asks_loss": 2.390625, "eval_reddit_asks_runtime": 307.0019, "eval_reddit_asks_samples_per_second": 85.85, "eval_reddit_asks_steps_per_second": 0.896, "step": 250 }, { "epoch": 0.01, "learning_rate": 4.016555205552159e-06, "loss": 1.6024, "step": 260 }, { "epoch": 0.01, "learning_rate": 4.044185435607626e-06, "loss": 1.6344, "step": 270 }, { "epoch": 0.01, "learning_rate": 4.070799615107415e-06, "loss": 1.5251, "step": 280 }, { "epoch": 0.01, "learning_rate": 4.096469827889988e-06, "loss": 1.5818, "step": 290 }, { "epoch": 0.01, "learning_rate": 4.121260748862021e-06, "loss": 1.6331, "step": 300 }, { "epoch": 0.01, "learning_rate": 4.145230625795312e-06, "loss": 1.6272, "step": 310 }, { "epoch": 0.01, "learning_rate": 4.1684321036962525e-06, "loss": 1.5872, "step": 320 }, { "epoch": 0.01, "learning_rate": 4.190912921100477e-06, "loss": 1.6504, "step": 330 }, { "epoch": 0.01, "learning_rate": 4.212716501452232e-06, "loss": 1.566, "step": 340 }, { "epoch": 0.01, "learning_rate": 4.233882457984791e-06, "loss": 1.6106, "step": 350 }, { "epoch": 0.01, "learning_rate": 4.2544470268536555e-06, "loss": 1.616, "step": 360 }, { "epoch": 0.01, "learning_rate": 4.27444344042015e-06, "loss": 1.6374, "step": 370 }, { "epoch": 0.01, "learning_rate": 4.293902250342989e-06, "loss": 1.5941, "step": 380 }, { "epoch": 0.01, "learning_rate": 4.312851608364853e-06, "loss": 1.6115, "step": 390 }, { "epoch": 0.01, "learning_rate": 4.3313175112718595e-06, "loss": 1.5531, "step": 400 }, { "epoch": 0.01, "learning_rate": 4.3493240153753665e-06, "loss": 1.5554, "step": 410 }, { "epoch": 0.01, "learning_rate": 4.366893424956263e-06, "loss": 1.5233, "step": 420 }, { "epoch": 0.01, "learning_rate": 4.38404645837504e-06, "loss": 1.5579, "step": 430 }, { "epoch": 0.01, "learning_rate": 4.400802394950703e-06, "loss": 1.6028, "step": 440 }, { "epoch": 0.01, "learning_rate": 4.4171792052198945e-06, "loss": 1.5239, "step": 450 }, { "epoch": 0.01, "learning_rate": 4.433193666783084e-06, "loss": 1.6149, "step": 460 }, { "epoch": 0.01, "learning_rate": 4.448861467610187e-06, "loss": 1.6114, "step": 470 }, { "epoch": 0.01, "learning_rate": 4.4641972984001906e-06, "loss": 1.6682, "step": 480 }, { "epoch": 0.01, "learning_rate": 4.479214935357724e-06, "loss": 1.5707, "step": 490 }, { "epoch": 0.01, "learning_rate": 4.493927314555554e-06, "loss": 1.5827, "step": 500 }, { "epoch": 0.01, "eval_gsm8k_hard_accuracy": 0.9183054435894304, "eval_gsm8k_hard_loss": 0.3740234375, "eval_gsm8k_hard_runtime": 3.8504, "eval_gsm8k_hard_samples_per_second": 68.565, "eval_gsm8k_hard_steps_per_second": 0.779, "step": 500 }, { "epoch": 0.01, "eval_webgpt_accuracy": 0.502614994399395, "eval_webgpt_loss": 2.185546875, "eval_webgpt_runtime": 37.4417, "eval_webgpt_samples_per_second": 104.589, "eval_webgpt_steps_per_second": 1.095, "step": 500 }, { "epoch": 0.01, "eval_squad_v2_accuracy": 0.8982722417170479, "eval_squad_v2_loss": 0.33447265625, "eval_squad_v2_runtime": 214.9352, "eval_squad_v2_samples_per_second": 121.264, "eval_squad_v2_steps_per_second": 1.265, "step": 500 }, { "epoch": 0.01, "eval_adversarial_qa_accuracy": 0.8046625473866452, "eval_adversarial_qa_loss": 0.8486328125, "eval_adversarial_qa_runtime": 51.9881, "eval_adversarial_qa_samples_per_second": 115.411, "eval_adversarial_qa_steps_per_second": 1.212, "step": 500 }, { "epoch": 0.01, "eval_private_tuning_accuracy": 0.6754279825666092, "eval_private_tuning_loss": 1.20703125, "eval_private_tuning_runtime": 143.688, "eval_private_tuning_samples_per_second": 147.389, "eval_private_tuning_steps_per_second": 1.538, "step": 500 }, { "epoch": 0.01, "eval_oa_translated_accuracy": 0.6956755454438557, "eval_oa_translated_loss": 1.2421875, "eval_oa_translated_runtime": 1298.0566, "eval_oa_translated_samples_per_second": 90.391, "eval_oa_translated_steps_per_second": 0.942, "step": 500 }, { "epoch": 0.01, "eval_prosocial_dialogue_accuracy": 0.5306487253309804, "eval_prosocial_dialogue_loss": 1.783203125, "eval_prosocial_dialogue_runtime": 62.6995, "eval_prosocial_dialogue_samples_per_second": 430.355, "eval_prosocial_dialogue_steps_per_second": 4.498, "step": 500 }, { "epoch": 0.01, "eval_math_qa_accuracy": 0.573035368807606, "eval_math_qa_loss": 1.849609375, "eval_math_qa_runtime": 42.0578, "eval_math_qa_samples_per_second": 141.9, "eval_math_qa_steps_per_second": 1.498, "step": 500 }, { "epoch": 0.01, "eval_wikihow_accuracy": 0.6166412425461101, "eval_wikihow_loss": 1.8369140625, "eval_wikihow_runtime": 17.5874, "eval_wikihow_samples_per_second": 130.377, "eval_wikihow_steps_per_second": 1.365, "step": 500 }, { "epoch": 0.01, "eval_joke_accuracy": 0.500284306292646, "eval_joke_loss": 2.1875, "eval_joke_runtime": 1.5291, "eval_joke_samples_per_second": 49.704, "eval_joke_steps_per_second": 0.654, "step": 500 }, { "epoch": 0.01, "eval_gsm8k_accuracy": 0.7605687018093785, "eval_gsm8k_loss": 0.91357421875, "eval_gsm8k_runtime": 11.3759, "eval_gsm8k_samples_per_second": 131.418, "eval_gsm8k_steps_per_second": 1.406, "step": 500 }, { "epoch": 0.01, "eval_ted_trans_en-hi_accuracy": 0.6839278864595321, "eval_ted_trans_en-hi_loss": 1.142578125, "eval_ted_trans_en-hi_runtime": 2.7736, "eval_ted_trans_en-hi_samples_per_second": 37.135, "eval_ted_trans_en-hi_steps_per_second": 0.721, "step": 500 }, { "epoch": 0.01, "eval_ted_trans_de-ja_accuracy": 0.6501228312605558, "eval_ted_trans_de-ja_loss": 1.5048828125, "eval_ted_trans_de-ja_runtime": 8.2515, "eval_ted_trans_de-ja_samples_per_second": 87.014, "eval_ted_trans_de-ja_steps_per_second": 0.97, "step": 500 }, { "epoch": 0.01, "eval_ted_trans_nl-en_accuracy": 0.7532021898001414, "eval_ted_trans_nl-en_loss": 1.0654296875, "eval_ted_trans_nl-en_runtime": 7.9186, "eval_ted_trans_nl-en_samples_per_second": 97.365, "eval_ted_trans_nl-en_steps_per_second": 1.137, "step": 500 }, { "epoch": 0.01, "eval_ted_trans_en-ja_accuracy": 0.6662950575994054, "eval_ted_trans_en-ja_loss": 1.3916015625, "eval_ted_trans_en-ja_runtime": 9.7107, "eval_ted_trans_en-ja_samples_per_second": 82.486, "eval_ted_trans_en-ja_steps_per_second": 0.927, "step": 500 }, { "epoch": 0.01, "eval_ted_trans_en-es_accuracy": 0.7895431674388482, "eval_ted_trans_en-es_loss": 0.87646484375, "eval_ted_trans_en-es_runtime": 9.3046, "eval_ted_trans_en-es_samples_per_second": 88.774, "eval_ted_trans_en-es_steps_per_second": 0.967, "step": 500 }, { "epoch": 0.01, "eval_ted_trans_en-ms_accuracy": 0.692143238909674, "eval_ted_trans_en-ms_loss": 1.36328125, "eval_ted_trans_en-ms_runtime": 1.0241, "eval_ted_trans_en-ms_samples_per_second": 41.011, "eval_ted_trans_en-ms_steps_per_second": 0.976, "step": 500 }, { "epoch": 0.01, "eval_xsum_accuracy": 0.621155930594, "eval_xsum_loss": 1.4501953125, "eval_xsum_runtime": 440.7691, "eval_xsum_samples_per_second": 92.586, "eval_xsum_steps_per_second": 0.966, "step": 500 }, { "epoch": 0.01, "eval_cnn_dailymail_accuracy": 0.6818918043407839, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 631.5892, "eval_cnn_dailymail_samples_per_second": 90.918, "eval_cnn_dailymail_steps_per_second": 0.948, "step": 500 }, { "epoch": 0.01, "eval_multi_news_accuracy": 0.5512987425377873, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 102.5343, "eval_multi_news_samples_per_second": 87.727, "eval_multi_news_steps_per_second": 0.917, "step": 500 }, { "epoch": 0.01, "eval_tldr_news_accuracy": 0.5479638860152356, "eval_tldr_news_loss": 2.09375, "eval_tldr_news_runtime": 7.6366, "eval_tldr_news_samples_per_second": 186.994, "eval_tldr_news_steps_per_second": 1.964, "step": 500 }, { "epoch": 0.01, "eval_scitldr_accuracy": 0.4991896272285251, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 5.9643, "eval_scitldr_samples_per_second": 66.899, "eval_scitldr_steps_per_second": 0.838, "step": 500 }, { "epoch": 0.01, "eval_samsum_accuracy": 0.6392542821992997, "eval_samsum_loss": 1.3603515625, "eval_samsum_runtime": 31.1036, "eval_samsum_samples_per_second": 94.748, "eval_samsum_steps_per_second": 0.997, "step": 500 }, { "epoch": 0.01, "eval_debate_sum_accuracy": 0.9375640253883767, "eval_debate_sum_loss": 0.34521484375, "eval_debate_sum_runtime": 548.5555, "eval_debate_sum_samples_per_second": 87.71, "eval_debate_sum_steps_per_second": 0.915, "step": 500 }, { "epoch": 0.01, "eval_billsum_accuracy": 0.6806867345609693, "eval_billsum_loss": 1.3427734375, "eval_billsum_runtime": 43.496, "eval_billsum_samples_per_second": 87.134, "eval_billsum_steps_per_second": 0.92, "step": 500 }, { "epoch": 0.01, "eval_wmt2019_zh-en_accuracy": 0.6670238429829493, "eval_wmt2019_zh-en_loss": 1.453125, "eval_wmt2019_zh-en_runtime": 28.9371, "eval_wmt2019_zh-en_samples_per_second": 137.574, "eval_wmt2019_zh-en_steps_per_second": 1.451, "step": 500 }, { "epoch": 0.01, "eval_wmt2019_ru-en_accuracy": 0.7587101830765136, "eval_wmt2019_ru-en_loss": 0.92724609375, "eval_wmt2019_ru-en_runtime": 23.7381, "eval_wmt2019_ru-en_samples_per_second": 126.379, "eval_wmt2019_ru-en_steps_per_second": 1.348, "step": 500 }, { "epoch": 0.01, "eval_wmt2019_de-en_accuracy": 0.7675478121558026, "eval_wmt2019_de-en_loss": 0.90478515625, "eval_wmt2019_de-en_runtime": 16.2264, "eval_wmt2019_de-en_samples_per_second": 184.76, "eval_wmt2019_de-en_steps_per_second": 1.972, "step": 500 }, { "epoch": 0.01, "eval_wmt2019_fr-de_accuracy": 0.7500888456249324, "eval_wmt2019_fr-de_loss": 0.99560546875, "eval_wmt2019_fr-de_runtime": 11.5712, "eval_wmt2019_fr-de_samples_per_second": 130.669, "eval_wmt2019_fr-de_steps_per_second": 1.383, "step": 500 }, { "epoch": 0.01, "eval_essay_instruction_accuracy": 0.6002366052672313, "eval_essay_instruction_loss": 1.9189453125, "eval_essay_instruction_runtime": 8.0794, "eval_essay_instruction_samples_per_second": 51.118, "eval_essay_instruction_steps_per_second": 0.619, "step": 500 }, { "epoch": 0.01, "eval_reddit_eli5_accuracy": 0.46082089893518746, "eval_reddit_eli5_loss": 2.4296875, "eval_reddit_eli5_runtime": 602.6271, "eval_reddit_eli5_samples_per_second": 90.482, "eval_reddit_eli5_steps_per_second": 0.943, "step": 500 }, { "epoch": 0.01, "eval_reddit_askh_accuracy": 0.46347532552175574, "eval_reddit_askh_loss": 2.52734375, "eval_reddit_askh_runtime": 245.7671, "eval_reddit_askh_samples_per_second": 80.178, "eval_reddit_askh_steps_per_second": 0.838, "step": 500 }, { "epoch": 0.01, "eval_reddit_asks_accuracy": 0.47150193020881753, "eval_reddit_asks_loss": 2.38671875, "eval_reddit_asks_runtime": 320.7509, "eval_reddit_asks_samples_per_second": 82.17, "eval_reddit_asks_steps_per_second": 0.857, "step": 500 }, { "epoch": 0.02, "learning_rate": 4.5083465988888945e-06, "loss": 1.5195, "step": 510 }, { "epoch": 0.02, "learning_rate": 4.5224842384899045e-06, "loss": 1.492, "step": 520 }, { "epoch": 0.02, "learning_rate": 4.5363510253542444e-06, "loss": 1.5302, "step": 530 }, { "epoch": 0.02, "learning_rate": 4.549957142832593e-06, "loss": 1.5267, "step": 540 }, { "epoch": 0.02, "learning_rate": 4.563312210555719e-06, "loss": 1.565, "step": 550 }, { "epoch": 0.02, "learning_rate": 4.576425325289549e-06, "loss": 1.6208, "step": 560 }, { "epoch": 0.02, "learning_rate": 4.589305098154845e-06, "loss": 1.6341, "step": 570 }, { "epoch": 0.02, "learning_rate": 4.601959688592886e-06, "loss": 1.5639, "step": 580 }, { "epoch": 0.02, "learning_rate": 4.614396835412691e-06, "loss": 1.6218, "step": 590 }, { "epoch": 0.02, "learning_rate": 4.626623885215616e-06, "loss": 1.5995, "step": 600 }, { "epoch": 0.02, "learning_rate": 4.638647818458763e-06, "loss": 1.6176, "step": 610 }, { "epoch": 0.02, "learning_rate": 4.650475273388737e-06, "loss": 1.5944, "step": 620 }, { "epoch": 0.02, "learning_rate": 4.662112568051194e-06, "loss": 1.6074, "step": 630 }, { "epoch": 0.02, "learning_rate": 4.673565720558918e-06, "loss": 1.5783, "step": 640 }, { "epoch": 0.02, "learning_rate": 4.6848404677811685e-06, "loss": 1.5135, "step": 650 }, { "epoch": 0.02, "learning_rate": 4.695942282599635e-06, "loss": 1.6396, "step": 660 }, { "epoch": 0.02, "learning_rate": 4.706876389860915e-06, "loss": 1.6053, "step": 670 }, { "epoch": 0.02, "learning_rate": 4.717647781141908e-06, "loss": 1.5982, "step": 680 }, { "epoch": 0.02, "learning_rate": 4.7282612284325845e-06, "loss": 1.5361, "step": 690 }, { "epoch": 0.02, "learning_rate": 4.738721296830016e-06, "loss": 1.5127, "step": 700 }, { "epoch": 0.02, "learning_rate": 4.749032356328167e-06, "loss": 1.4852, "step": 710 }, { "epoch": 0.02, "learning_rate": 4.759198592779668e-06, "loss": 1.5432, "step": 720 }, { "epoch": 0.02, "learning_rate": 4.769224018098397e-06, "loss": 1.5425, "step": 730 }, { "epoch": 0.02, "learning_rate": 4.7791124797650865e-06, "loss": 1.493, "step": 740 }, { "epoch": 0.02, "learning_rate": 4.788867669692332e-06, "loss": 1.5065, "step": 750 }, { "epoch": 0.02, "eval_gsm8k_hard_accuracy": 0.9174097145881682, "eval_gsm8k_hard_loss": 0.366455078125, "eval_gsm8k_hard_runtime": 6.7984, "eval_gsm8k_hard_samples_per_second": 38.833, "eval_gsm8k_hard_steps_per_second": 0.441, "step": 750 }, { "epoch": 0.02, "eval_webgpt_accuracy": 0.5023221414992414, "eval_webgpt_loss": 2.181640625, "eval_webgpt_runtime": 39.4537, "eval_webgpt_samples_per_second": 99.256, "eval_webgpt_steps_per_second": 1.039, "step": 750 }, { "epoch": 0.02, "eval_squad_v2_accuracy": 0.8977014895925817, "eval_squad_v2_loss": 0.331787109375, "eval_squad_v2_runtime": 214.9281, "eval_squad_v2_samples_per_second": 121.268, "eval_squad_v2_steps_per_second": 1.266, "step": 750 }, { "epoch": 0.02, "eval_adversarial_qa_accuracy": 0.8063639891346527, "eval_adversarial_qa_loss": 0.8232421875, "eval_adversarial_qa_runtime": 51.9182, "eval_adversarial_qa_samples_per_second": 115.567, "eval_adversarial_qa_steps_per_second": 1.213, "step": 750 }, { "epoch": 0.02, "eval_private_tuning_accuracy": 0.6775308778615678, "eval_private_tuning_loss": 1.1962890625, "eval_private_tuning_runtime": 149.5702, "eval_private_tuning_samples_per_second": 141.592, "eval_private_tuning_steps_per_second": 1.478, "step": 750 }, { "epoch": 0.02, "eval_oa_translated_accuracy": 0.6986534506008611, "eval_oa_translated_loss": 1.22265625, "eval_oa_translated_runtime": 1324.5514, "eval_oa_translated_samples_per_second": 89.655, "eval_oa_translated_steps_per_second": 0.935, "step": 750 }, { "epoch": 0.02, "eval_prosocial_dialogue_accuracy": 0.5327101026505052, "eval_prosocial_dialogue_loss": 1.7802734375, "eval_prosocial_dialogue_runtime": 70.7166, "eval_prosocial_dialogue_samples_per_second": 381.565, "eval_prosocial_dialogue_steps_per_second": 3.988, "step": 750 }, { "epoch": 0.02, "eval_math_qa_accuracy": 0.5798378605476227, "eval_math_qa_loss": 1.826171875, "eval_math_qa_runtime": 44.6748, "eval_math_qa_samples_per_second": 133.588, "eval_math_qa_steps_per_second": 1.41, "step": 750 }, { "epoch": 0.02, "eval_wikihow_accuracy": 0.6193731798640966, "eval_wikihow_loss": 1.802734375, "eval_wikihow_runtime": 16.8626, "eval_wikihow_samples_per_second": 135.981, "eval_wikihow_steps_per_second": 1.423, "step": 750 }, { "epoch": 0.02, "eval_joke_accuracy": 0.5020849128127369, "eval_joke_loss": 2.1640625, "eval_joke_runtime": 1.3597, "eval_joke_samples_per_second": 55.896, "eval_joke_steps_per_second": 0.735, "step": 750 }, { "epoch": 0.02, "eval_gsm8k_accuracy": 0.760008955934006, "eval_gsm8k_loss": 0.9189453125, "eval_gsm8k_runtime": 12.0443, "eval_gsm8k_samples_per_second": 124.126, "eval_gsm8k_steps_per_second": 1.328, "step": 750 }, { "epoch": 0.02, "eval_ted_trans_en-hi_accuracy": 0.6714796661809511, "eval_ted_trans_en-hi_loss": 1.2548828125, "eval_ted_trans_en-hi_runtime": 2.3695, "eval_ted_trans_en-hi_samples_per_second": 43.47, "eval_ted_trans_en-hi_steps_per_second": 0.844, "step": 750 }, { "epoch": 0.02, "eval_ted_trans_de-ja_accuracy": 0.6580367185861629, "eval_ted_trans_de-ja_loss": 1.466796875, "eval_ted_trans_de-ja_runtime": 9.4824, "eval_ted_trans_de-ja_samples_per_second": 75.72, "eval_ted_trans_de-ja_steps_per_second": 0.844, "step": 750 }, { "epoch": 0.02, "eval_ted_trans_nl-en_accuracy": 0.749224515991015, "eval_ted_trans_nl-en_loss": 1.080078125, "eval_ted_trans_nl-en_runtime": 8.4451, "eval_ted_trans_nl-en_samples_per_second": 91.296, "eval_ted_trans_nl-en_steps_per_second": 1.066, "step": 750 }, { "epoch": 0.02, "eval_ted_trans_en-ja_accuracy": 0.6621738060068931, "eval_ted_trans_en-ja_loss": 1.384765625, "eval_ted_trans_en-ja_runtime": 10.0893, "eval_ted_trans_en-ja_samples_per_second": 79.391, "eval_ted_trans_en-ja_steps_per_second": 0.892, "step": 750 }, { "epoch": 0.02, "eval_ted_trans_en-es_accuracy": 0.793457991028678, "eval_ted_trans_en-es_loss": 0.85205078125, "eval_ted_trans_en-es_runtime": 7.1771, "eval_ted_trans_en-es_samples_per_second": 115.088, "eval_ted_trans_en-es_steps_per_second": 1.254, "step": 750 }, { "epoch": 0.02, "eval_ted_trans_en-ms_accuracy": 0.6782511210762332, "eval_ted_trans_en-ms_loss": 1.4208984375, "eval_ted_trans_en-ms_runtime": 2.0842, "eval_ted_trans_en-ms_samples_per_second": 20.151, "eval_ted_trans_en-ms_steps_per_second": 0.48, "step": 750 }, { "epoch": 0.02, "eval_xsum_accuracy": 0.6225130561751453, "eval_xsum_loss": 1.4462890625, "eval_xsum_runtime": 439.444, "eval_xsum_samples_per_second": 92.865, "eval_xsum_steps_per_second": 0.969, "step": 750 }, { "epoch": 0.02, "eval_cnn_dailymail_accuracy": 0.6778638530242029, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 633.9568, "eval_cnn_dailymail_samples_per_second": 90.579, "eval_cnn_dailymail_steps_per_second": 0.945, "step": 750 }, { "epoch": 0.02, "eval_multi_news_accuracy": 0.5553791439095643, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 103.0316, "eval_multi_news_samples_per_second": 87.303, "eval_multi_news_steps_per_second": 0.912, "step": 750 }, { "epoch": 0.02, "eval_tldr_news_accuracy": 0.5591084360011286, "eval_tldr_news_loss": 1.9892578125, "eval_tldr_news_runtime": 8.3367, "eval_tldr_news_samples_per_second": 171.29, "eval_tldr_news_steps_per_second": 1.799, "step": 750 }, { "epoch": 0.02, "eval_scitldr_accuracy": 0.49270664505672607, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 5.8517, "eval_scitldr_samples_per_second": 68.186, "eval_scitldr_steps_per_second": 0.854, "step": 750 }, { "epoch": 0.02, "eval_samsum_accuracy": 0.6411875245035082, "eval_samsum_loss": 1.32421875, "eval_samsum_runtime": 32.2591, "eval_samsum_samples_per_second": 91.354, "eval_samsum_steps_per_second": 0.961, "step": 750 }, { "epoch": 0.02, "eval_debate_sum_accuracy": 0.9381249710591028, "eval_debate_sum_loss": 0.337646484375, "eval_debate_sum_runtime": 548.1225, "eval_debate_sum_samples_per_second": 87.78, "eval_debate_sum_steps_per_second": 0.916, "step": 750 }, { "epoch": 0.02, "eval_billsum_accuracy": 0.6810246806233696, "eval_billsum_loss": 1.3359375, "eval_billsum_runtime": 50.0247, "eval_billsum_samples_per_second": 75.763, "eval_billsum_steps_per_second": 0.8, "step": 750 }, { "epoch": 0.02, "eval_wmt2019_zh-en_accuracy": 0.6683125468349724, "eval_wmt2019_zh-en_loss": 1.451171875, "eval_wmt2019_zh-en_runtime": 27.2087, "eval_wmt2019_zh-en_samples_per_second": 146.313, "eval_wmt2019_zh-en_steps_per_second": 1.544, "step": 750 }, { "epoch": 0.02, "eval_wmt2019_ru-en_accuracy": 0.755552089368213, "eval_wmt2019_ru-en_loss": 0.94091796875, "eval_wmt2019_ru-en_runtime": 20.7954, "eval_wmt2019_ru-en_samples_per_second": 144.262, "eval_wmt2019_ru-en_steps_per_second": 1.539, "step": 750 }, { "epoch": 0.02, "eval_wmt2019_de-en_accuracy": 0.7641599453590333, "eval_wmt2019_de-en_loss": 0.9228515625, "eval_wmt2019_de-en_runtime": 15.5528, "eval_wmt2019_de-en_samples_per_second": 192.762, "eval_wmt2019_de-en_steps_per_second": 2.058, "step": 750 }, { "epoch": 0.02, "eval_wmt2019_fr-de_accuracy": 0.7474449624849476, "eval_wmt2019_fr-de_loss": 1.00390625, "eval_wmt2019_fr-de_runtime": 11.5093, "eval_wmt2019_fr-de_samples_per_second": 131.372, "eval_wmt2019_fr-de_steps_per_second": 1.39, "step": 750 }, { "epoch": 0.02, "eval_essay_instruction_accuracy": 0.6032218119098689, "eval_essay_instruction_loss": 1.904296875, "eval_essay_instruction_runtime": 7.606, "eval_essay_instruction_samples_per_second": 54.299, "eval_essay_instruction_steps_per_second": 0.657, "step": 750 }, { "epoch": 0.02, "eval_reddit_eli5_accuracy": 0.4612608360817972, "eval_reddit_eli5_loss": 2.431640625, "eval_reddit_eli5_runtime": 597.1988, "eval_reddit_eli5_samples_per_second": 91.305, "eval_reddit_eli5_steps_per_second": 0.951, "step": 750 }, { "epoch": 0.02, "eval_reddit_askh_accuracy": 0.46371300245115404, "eval_reddit_askh_loss": 2.525390625, "eval_reddit_askh_runtime": 253.0373, "eval_reddit_askh_samples_per_second": 77.874, "eval_reddit_askh_steps_per_second": 0.814, "step": 750 }, { "epoch": 0.02, "eval_reddit_asks_accuracy": 0.47195547000535765, "eval_reddit_asks_loss": 2.388671875, "eval_reddit_asks_runtime": 304.1555, "eval_reddit_asks_samples_per_second": 86.653, "eval_reddit_asks_steps_per_second": 0.904, "step": 750 }, { "epoch": 0.02, "learning_rate": 4.798493132500121e-06, "loss": 1.5526, "step": 760 }, { "epoch": 0.02, "learning_rate": 4.8079922732483016e-06, "loss": 1.4845, "step": 770 }, { "epoch": 0.02, "learning_rate": 4.817368364668191e-06, "loss": 1.5351, "step": 780 }, { "epoch": 0.02, "learning_rate": 4.8266245539317745e-06, "loss": 1.5942, "step": 790 }, { "epoch": 0.02, "learning_rate": 4.835763868993521e-06, "loss": 1.4886, "step": 800 }, { "epoch": 0.02, "learning_rate": 4.844789224536785e-06, "loss": 1.5645, "step": 810 }, { "epoch": 0.02, "learning_rate": 4.853703427554027e-06, "loss": 1.5099, "step": 820 }, { "epoch": 0.02, "learning_rate": 4.862509182587578e-06, "loss": 1.619, "step": 830 }, { "epoch": 0.02, "learning_rate": 4.871209096655434e-06, "loss": 1.542, "step": 840 }, { "epoch": 0.03, "learning_rate": 4.879805683884512e-06, "loss": 1.5254, "step": 850 }, { "epoch": 0.03, "learning_rate": 4.888301369871998e-06, "loss": 1.5427, "step": 860 }, { "epoch": 0.03, "learning_rate": 4.8966984957936845e-06, "loss": 1.5403, "step": 870 }, { "epoch": 0.03, "learning_rate": 4.904999322276735e-06, "loss": 1.5848, "step": 880 }, { "epoch": 0.03, "learning_rate": 4.913206033052878e-06, "loss": 1.5205, "step": 890 }, { "epoch": 0.03, "learning_rate": 4.921320738406821e-06, "loss": 1.5359, "step": 900 }, { "epoch": 0.03, "learning_rate": 4.929345478433492e-06, "loss": 1.5631, "step": 910 }, { "epoch": 0.03, "learning_rate": 4.937282226116702e-06, "loss": 1.5928, "step": 920 }, { "epoch": 0.03, "learning_rate": 4.945132890240829e-06, "loss": 1.4707, "step": 930 }, { "epoch": 0.03, "learning_rate": 4.952899318146298e-06, "loss": 1.5279, "step": 940 }, { "epoch": 0.03, "learning_rate": 4.96058329833879e-06, "loss": 1.5411, "step": 950 }, { "epoch": 0.03, "learning_rate": 4.968186562961406e-06, "loss": 1.6029, "step": 960 }, { "epoch": 0.03, "learning_rate": 4.975710790138337e-06, "loss": 1.648, "step": 970 }, { "epoch": 0.03, "learning_rate": 4.9831576061979556e-06, "loss": 1.5799, "step": 980 }, { "epoch": 0.03, "learning_rate": 4.990528587782728e-06, "loss": 1.5592, "step": 990 }, { "epoch": 0.03, "learning_rate": 4.99782526385276e-06, "loss": 1.6317, "step": 1000 }, { "epoch": 0.03, "eval_gsm8k_hard_accuracy": 0.9242294694841415, "eval_gsm8k_hard_loss": 0.337158203125, "eval_gsm8k_hard_runtime": 4.4214, "eval_gsm8k_hard_samples_per_second": 59.709, "eval_gsm8k_hard_steps_per_second": 0.679, "step": 1000 }, { "epoch": 0.03, "eval_webgpt_accuracy": 0.5016719087887306, "eval_webgpt_loss": 2.181640625, "eval_webgpt_runtime": 36.3649, "eval_webgpt_samples_per_second": 107.686, "eval_webgpt_steps_per_second": 1.127, "step": 1000 }, { "epoch": 0.03, "eval_squad_v2_accuracy": 0.9092151805972463, "eval_squad_v2_loss": 0.35546875, "eval_squad_v2_runtime": 216.2111, "eval_squad_v2_samples_per_second": 120.549, "eval_squad_v2_steps_per_second": 1.258, "step": 1000 }, { "epoch": 0.03, "eval_adversarial_qa_accuracy": 0.8333781081161756, "eval_adversarial_qa_loss": 0.83642578125, "eval_adversarial_qa_runtime": 52.2959, "eval_adversarial_qa_samples_per_second": 114.732, "eval_adversarial_qa_steps_per_second": 1.205, "step": 1000 }, { "epoch": 0.03, "eval_private_tuning_accuracy": 0.6788522917969135, "eval_private_tuning_loss": 1.1845703125, "eval_private_tuning_runtime": 145.6236, "eval_private_tuning_samples_per_second": 145.43, "eval_private_tuning_steps_per_second": 1.518, "step": 1000 }, { "epoch": 0.03, "eval_oa_translated_accuracy": 0.7015835515263078, "eval_oa_translated_loss": 1.208984375, "eval_oa_translated_runtime": 1331.5436, "eval_oa_translated_samples_per_second": 89.184, "eval_oa_translated_steps_per_second": 0.93, "step": 1000 }, { "epoch": 0.03, "eval_prosocial_dialogue_accuracy": 0.5440172516743936, "eval_prosocial_dialogue_loss": 1.7470703125, "eval_prosocial_dialogue_runtime": 66.8792, "eval_prosocial_dialogue_samples_per_second": 403.459, "eval_prosocial_dialogue_steps_per_second": 4.217, "step": 1000 }, { "epoch": 0.03, "eval_math_qa_accuracy": 0.5903696634283728, "eval_math_qa_loss": 1.7734375, "eval_math_qa_runtime": 43.3917, "eval_math_qa_samples_per_second": 137.538, "eval_math_qa_steps_per_second": 1.452, "step": 1000 }, { "epoch": 0.03, "eval_wikihow_accuracy": 0.6181528220773818, "eval_wikihow_loss": 1.79296875, "eval_wikihow_runtime": 16.8686, "eval_wikihow_samples_per_second": 135.933, "eval_wikihow_steps_per_second": 1.423, "step": 1000 }, { "epoch": 0.03, "eval_joke_accuracy": 0.5162054586808188, "eval_joke_loss": 2.095703125, "eval_joke_runtime": 1.1499, "eval_joke_samples_per_second": 66.094, "eval_joke_steps_per_second": 0.87, "step": 1000 }, { "epoch": 0.03, "eval_gsm8k_accuracy": 0.7709449909740977, "eval_gsm8k_loss": 0.8671875, "eval_gsm8k_runtime": 11.5578, "eval_gsm8k_samples_per_second": 129.35, "eval_gsm8k_steps_per_second": 1.384, "step": 1000 }, { "epoch": 0.03, "eval_ted_trans_en-hi_accuracy": 0.6727249123718032, "eval_ted_trans_en-hi_loss": 1.2490234375, "eval_ted_trans_en-hi_runtime": 3.3114, "eval_ted_trans_en-hi_samples_per_second": 31.105, "eval_ted_trans_en-hi_steps_per_second": 0.604, "step": 1000 }, { "epoch": 0.03, "eval_ted_trans_de-ja_accuracy": 0.6605206483545547, "eval_ted_trans_de-ja_loss": 1.4599609375, "eval_ted_trans_de-ja_runtime": 8.583, "eval_ted_trans_de-ja_samples_per_second": 83.654, "eval_ted_trans_de-ja_steps_per_second": 0.932, "step": 1000 }, { "epoch": 0.03, "eval_ted_trans_nl-en_accuracy": 0.757177992835374, "eval_ted_trans_nl-en_loss": 1.0478515625, "eval_ted_trans_nl-en_runtime": 8.7612, "eval_ted_trans_nl-en_samples_per_second": 88.002, "eval_ted_trans_nl-en_steps_per_second": 1.027, "step": 1000 }, { "epoch": 0.03, "eval_ted_trans_en-ja_accuracy": 0.6644915715062534, "eval_ted_trans_en-ja_loss": 1.3798828125, "eval_ted_trans_en-ja_runtime": 9.6809, "eval_ted_trans_en-ja_samples_per_second": 82.74, "eval_ted_trans_en-ja_steps_per_second": 0.93, "step": 1000 }, { "epoch": 0.03, "eval_ted_trans_en-es_accuracy": 0.7831230683487865, "eval_ted_trans_en-es_loss": 0.89501953125, "eval_ted_trans_en-es_runtime": 8.1422, "eval_ted_trans_en-es_samples_per_second": 101.447, "eval_ted_trans_en-es_steps_per_second": 1.105, "step": 1000 }, { "epoch": 0.03, "eval_ted_trans_en-ms_accuracy": 0.6917040358744395, "eval_ted_trans_en-ms_loss": 1.3955078125, "eval_ted_trans_en-ms_runtime": 0.7332, "eval_ted_trans_en-ms_samples_per_second": 57.285, "eval_ted_trans_en-ms_steps_per_second": 1.364, "step": 1000 }, { "epoch": 0.03, "eval_xsum_accuracy": 0.6225837900623918, "eval_xsum_loss": 1.4453125, "eval_xsum_runtime": 443.13, "eval_xsum_samples_per_second": 92.093, "eval_xsum_steps_per_second": 0.961, "step": 1000 }, { "epoch": 0.03, "eval_cnn_dailymail_accuracy": 0.6811569253551761, "eval_cnn_dailymail_loss": NaN, "eval_cnn_dailymail_runtime": 634.279, "eval_cnn_dailymail_samples_per_second": 90.533, "eval_cnn_dailymail_steps_per_second": 0.944, "step": 1000 }, { "epoch": 0.03, "eval_multi_news_accuracy": 0.5572843896862695, "eval_multi_news_loss": NaN, "eval_multi_news_runtime": 104.4536, "eval_multi_news_samples_per_second": 86.115, "eval_multi_news_steps_per_second": 0.9, "step": 1000 }, { "epoch": 0.03, "eval_tldr_news_accuracy": 0.5934825543120474, "eval_tldr_news_loss": 1.779296875, "eval_tldr_news_runtime": 7.875, "eval_tldr_news_samples_per_second": 181.334, "eval_tldr_news_steps_per_second": 1.905, "step": 1000 }, { "epoch": 0.03, "eval_scitldr_accuracy": 0.49756888168557534, "eval_scitldr_loss": NaN, "eval_scitldr_runtime": 5.4836, "eval_scitldr_samples_per_second": 72.763, "eval_scitldr_steps_per_second": 0.912, "step": 1000 }, { "epoch": 0.03, "eval_samsum_accuracy": 0.6441076667252498, "eval_samsum_loss": 1.3203125, "eval_samsum_runtime": 31.9228, "eval_samsum_samples_per_second": 92.317, "eval_samsum_steps_per_second": 0.971, "step": 1000 }, { "epoch": 0.03, "eval_debate_sum_accuracy": 0.9393930900916929, "eval_debate_sum_loss": 0.327392578125, "eval_debate_sum_runtime": 546.4353, "eval_debate_sum_samples_per_second": 88.051, "eval_debate_sum_steps_per_second": 0.919, "step": 1000 }, { "epoch": 0.03, "eval_billsum_accuracy": 0.6859647270039075, "eval_billsum_loss": 1.3212890625, "eval_billsum_runtime": 47.3064, "eval_billsum_samples_per_second": 80.116, "eval_billsum_steps_per_second": 0.846, "step": 1000 }, { "epoch": 0.03, "eval_wmt2019_zh-en_accuracy": 0.6666222464280335, "eval_wmt2019_zh-en_loss": 1.4609375, "eval_wmt2019_zh-en_runtime": 27.4142, "eval_wmt2019_zh-en_samples_per_second": 145.217, "eval_wmt2019_zh-en_steps_per_second": 1.532, "step": 1000 }, { "epoch": 0.03, "eval_wmt2019_ru-en_accuracy": 0.7586163428740916, "eval_wmt2019_ru-en_loss": 0.93212890625, "eval_wmt2019_ru-en_runtime": 22.6757, "eval_wmt2019_ru-en_samples_per_second": 132.3, "eval_wmt2019_ru-en_steps_per_second": 1.411, "step": 1000 }, { "epoch": 0.03, "eval_wmt2019_de-en_accuracy": 0.7644713185146496, "eval_wmt2019_de-en_loss": 0.92724609375, "eval_wmt2019_de-en_runtime": 15.456, "eval_wmt2019_de-en_samples_per_second": 193.97, "eval_wmt2019_de-en_steps_per_second": 2.07, "step": 1000 }, { "epoch": 0.03, "eval_wmt2019_fr-de_accuracy": 0.7478946231915353, "eval_wmt2019_fr-de_loss": 1.0068359375, "eval_wmt2019_fr-de_runtime": 10.3196, "eval_wmt2019_fr-de_samples_per_second": 146.518, "eval_wmt2019_fr-de_steps_per_second": 1.55, "step": 1000 }, { "epoch": 0.03, "eval_essay_instruction_accuracy": 0.6048415629215222, "eval_essay_instruction_loss": 1.8955078125, "eval_essay_instruction_runtime": 9.0231, "eval_essay_instruction_samples_per_second": 45.771, "eval_essay_instruction_steps_per_second": 0.554, "step": 1000 }, { "epoch": 0.03, "eval_reddit_eli5_accuracy": 0.4608785206404607, "eval_reddit_eli5_loss": 2.4296875, "eval_reddit_eli5_runtime": 597.6745, "eval_reddit_eli5_samples_per_second": 91.232, "eval_reddit_eli5_steps_per_second": 0.95, "step": 1000 }, { "epoch": 0.03, "eval_reddit_askh_accuracy": 0.4638486660338061, "eval_reddit_askh_loss": 2.5234375, "eval_reddit_askh_runtime": 248.9187, "eval_reddit_askh_samples_per_second": 79.162, "eval_reddit_askh_steps_per_second": 0.828, "step": 1000 }, { "epoch": 0.03, "eval_reddit_asks_accuracy": 0.4715865474658219, "eval_reddit_asks_loss": 2.384765625, "eval_reddit_asks_runtime": 310.7333, "eval_reddit_asks_samples_per_second": 84.819, "eval_reddit_asks_steps_per_second": 0.885, "step": 1000 } ], "max_steps": 67822, "num_train_epochs": 2, "total_flos": 1.7293861155088892e+19, "trial_name": null, "trial_params": null }