pythia-3b-deduped-sft / trainer_state.json
theblackcat102's picture
Upload 6 files
11909cf
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.13174555541730404,
"global_step": 2250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.666666666666667e-06,
"loss": 2.1425,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 2.1683833261066357e-06,
"loss": 2.0497,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.4618687578661045e-06,
"loss": 1.8724,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 2.6700999855466042e-06,
"loss": 1.8389,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 2.8316166738933647e-06,
"loss": 1.774,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 2.963585417306073e-06,
"loss": 1.7948,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 3.075163400023762e-06,
"loss": 1.7367,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 3.171816644986573e-06,
"loss": 1.7413,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 3.257070849065542e-06,
"loss": 1.7729,
"step": 90
},
{
"epoch": 0.0,
"learning_rate": 3.333333333333334e-06,
"loss": 1.7465,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 3.402321141930376e-06,
"loss": 1.7541,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 3.4653020767460416e-06,
"loss": 1.7026,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 3.523238920511395e-06,
"loss": 1.7366,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 3.5768800594637304e-06,
"loss": 1.6725,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 3.626818765092802e-06,
"loss": 1.6984,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 3.6735333044265414e-06,
"loss": 1.6792,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 3.717414868963791e-06,
"loss": 1.6731,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 3.7587875085055104e-06,
"loss": 1.7055,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 3.7979226682547152e-06,
"loss": 1.6812,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 3.835049992773302e-06,
"loss": 1.624,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 3.870365491223199e-06,
"loss": 1.6899,
"step": 210
},
{
"epoch": 0.01,
"learning_rate": 3.9040378013703444e-06,
"loss": 1.7062,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 3.936213060029322e-06,
"loss": 1.6366,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 3.96701873618601e-06,
"loss": 1.6621,
"step": 240
},
{
"epoch": 0.01,
"learning_rate": 3.996566681120062e-06,
"loss": 1.7629,
"step": 250
},
{
"epoch": 0.01,
"eval_gsm8k_hard_accuracy": 0.8147876715117462,
"eval_gsm8k_hard_loss": 0.76171875,
"eval_gsm8k_hard_runtime": 2.971,
"eval_gsm8k_hard_samples_per_second": 88.859,
"eval_gsm8k_hard_steps_per_second": 2.02,
"step": 250
},
{
"epoch": 0.01,
"eval_webgpt_accuracy": 0.48539789112821163,
"eval_webgpt_loss": 2.314453125,
"eval_webgpt_runtime": 16.0854,
"eval_webgpt_samples_per_second": 243.451,
"eval_webgpt_steps_per_second": 5.098,
"step": 250
},
{
"epoch": 0.01,
"eval_squad_v2_accuracy": 0.8767175411152919,
"eval_squad_v2_loss": 0.428466796875,
"eval_squad_v2_runtime": 89.9878,
"eval_squad_v2_samples_per_second": 289.639,
"eval_squad_v2_steps_per_second": 6.034,
"step": 250
},
{
"epoch": 0.01,
"eval_adversarial_qa_accuracy": 0.7944240470433719,
"eval_adversarial_qa_loss": 0.986328125,
"eval_adversarial_qa_runtime": 22.0494,
"eval_adversarial_qa_samples_per_second": 272.116,
"eval_adversarial_qa_steps_per_second": 5.669,
"step": 250
},
{
"epoch": 0.01,
"eval_private_tuning_accuracy": 0.6531239810829162,
"eval_private_tuning_loss": 1.337890625,
"eval_private_tuning_runtime": 61.5659,
"eval_private_tuning_samples_per_second": 343.989,
"eval_private_tuning_steps_per_second": 7.179,
"step": 250
},
{
"epoch": 0.01,
"eval_oa_translated_accuracy": 0.6721688366960878,
"eval_oa_translated_loss": 1.3828125,
"eval_oa_translated_runtime": 712.3024,
"eval_oa_translated_samples_per_second": 196.356,
"eval_oa_translated_steps_per_second": 4.091,
"step": 250
},
{
"epoch": 0.01,
"eval_prosocial_dialogue_accuracy": 0.4919427530416725,
"eval_prosocial_dialogue_loss": 1.9140625,
"eval_prosocial_dialogue_runtime": 91.8198,
"eval_prosocial_dialogue_samples_per_second": 293.869,
"eval_prosocial_dialogue_steps_per_second": 6.132,
"step": 250
},
{
"epoch": 0.01,
"eval_math_qa_accuracy": 0.540921279895434,
"eval_math_qa_loss": 2.076171875,
"eval_math_qa_runtime": 19.0288,
"eval_math_qa_samples_per_second": 313.63,
"eval_math_qa_steps_per_second": 6.569,
"step": 250
},
{
"epoch": 0.01,
"eval_wikihow_accuracy": 0.5926709194286507,
"eval_wikihow_loss": 2.01171875,
"eval_wikihow_runtime": 7.434,
"eval_wikihow_samples_per_second": 308.448,
"eval_wikihow_steps_per_second": 6.457,
"step": 250
},
{
"epoch": 0.01,
"eval_joke_accuracy": 0.4775398028809704,
"eval_joke_loss": 2.380859375,
"eval_joke_runtime": 0.5994,
"eval_joke_samples_per_second": 126.794,
"eval_joke_steps_per_second": 3.337,
"step": 250
},
{
"epoch": 0.01,
"eval_gsm8k_accuracy": 0.7411105358167392,
"eval_gsm8k_loss": 1.0078125,
"eval_gsm8k_runtime": 6.2732,
"eval_gsm8k_samples_per_second": 238.315,
"eval_gsm8k_steps_per_second": 5.101,
"step": 250
},
{
"epoch": 0.01,
"eval_ted_trans_en-hi_accuracy": 0.6262273619899629,
"eval_ted_trans_en-hi_loss": 1.46875,
"eval_ted_trans_en-hi_runtime": 1.1364,
"eval_ted_trans_en-hi_samples_per_second": 90.639,
"eval_ted_trans_en-hi_steps_per_second": 2.64,
"step": 250
},
{
"epoch": 0.01,
"eval_ted_trans_de-ja_accuracy": 0.6054150683400991,
"eval_ted_trans_de-ja_loss": 1.76953125,
"eval_ted_trans_de-ja_runtime": 3.475,
"eval_ted_trans_de-ja_samples_per_second": 206.617,
"eval_ted_trans_de-ja_steps_per_second": 4.317,
"step": 250
},
{
"epoch": 0.01,
"eval_ted_trans_nl-en_accuracy": 0.7055143773327025,
"eval_ted_trans_nl-en_loss": 1.3232421875,
"eval_ted_trans_nl-en_runtime": 4.3836,
"eval_ted_trans_nl-en_samples_per_second": 175.884,
"eval_ted_trans_nl-en_steps_per_second": 3.878,
"step": 250
},
{
"epoch": 0.01,
"eval_ted_trans_en-ja_accuracy": 0.6165139119558755,
"eval_ted_trans_en-ja_loss": 1.685546875,
"eval_ted_trans_en-ja_runtime": 3.9868,
"eval_ted_trans_en-ja_samples_per_second": 200.912,
"eval_ted_trans_en-ja_steps_per_second": 4.264,
"step": 250
},
{
"epoch": 0.01,
"eval_ted_trans_en-es_accuracy": 0.7508869040130834,
"eval_ted_trans_en-es_loss": 1.0810546875,
"eval_ted_trans_en-es_runtime": 4.6601,
"eval_ted_trans_en-es_samples_per_second": 177.248,
"eval_ted_trans_en-es_steps_per_second": 3.863,
"step": 250
},
{
"epoch": 0.01,
"eval_ted_trans_en-ms_accuracy": 0.6461456102783726,
"eval_ted_trans_en-ms_loss": 1.6650390625,
"eval_ted_trans_en-ms_runtime": 0.4654,
"eval_ted_trans_en-ms_samples_per_second": 90.238,
"eval_ted_trans_en-ms_steps_per_second": 2.149,
"step": 250
},
{
"epoch": 0.01,
"eval_xsum_accuracy": 0.5982287223480477,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 192.75,
"eval_xsum_samples_per_second": 211.72,
"eval_xsum_steps_per_second": 4.415,
"step": 250
},
{
"epoch": 0.01,
"eval_cnn_dailymail_accuracy": 0.6676366328844463,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 274.4306,
"eval_cnn_dailymail_samples_per_second": 209.244,
"eval_cnn_dailymail_steps_per_second": 4.362,
"step": 250
},
{
"epoch": 0.01,
"eval_multi_news_accuracy": 0.5282928997840721,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 46.6919,
"eval_multi_news_samples_per_second": 192.646,
"eval_multi_news_steps_per_second": 4.026,
"step": 250
},
{
"epoch": 0.01,
"eval_tldr_news_accuracy": 0.5294836828740713,
"eval_tldr_news_loss": 2.2265625,
"eval_tldr_news_runtime": 3.3342,
"eval_tldr_news_samples_per_second": 428.294,
"eval_tldr_news_steps_per_second": 8.998,
"step": 250
},
{
"epoch": 0.01,
"eval_scitldr_accuracy": 0.49432739059967584,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.6633,
"eval_scitldr_samples_per_second": 149.816,
"eval_scitldr_steps_per_second": 3.379,
"step": 250
},
{
"epoch": 0.01,
"eval_samsum_accuracy": 0.6079303492003407,
"eval_samsum_loss": 1.4892578125,
"eval_samsum_runtime": 14.0655,
"eval_samsum_samples_per_second": 209.52,
"eval_samsum_steps_per_second": 4.408,
"step": 250
},
{
"epoch": 0.01,
"eval_debate_sum_accuracy": 0.9296994127333449,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 250.1193,
"eval_debate_sum_samples_per_second": 192.364,
"eval_debate_sum_steps_per_second": 4.01,
"step": 250
},
{
"epoch": 0.01,
"eval_billsum_accuracy": 0.6637741667488066,
"eval_billsum_loss": 1.4560546875,
"eval_billsum_runtime": 20.9773,
"eval_billsum_samples_per_second": 180.672,
"eval_billsum_steps_per_second": 3.766,
"step": 250
},
{
"epoch": 0.01,
"eval_wmt2019_zh-en_accuracy": 0.6277540662129427,
"eval_wmt2019_zh-en_loss": 1.6943359375,
"eval_wmt2019_zh-en_runtime": 10.9759,
"eval_wmt2019_zh-en_samples_per_second": 362.702,
"eval_wmt2019_zh-en_steps_per_second": 7.562,
"step": 250
},
{
"epoch": 0.01,
"eval_wmt2019_ru-en_accuracy": 0.721860857670872,
"eval_wmt2019_ru-en_loss": 1.11328125,
"eval_wmt2019_ru-en_runtime": 10.5639,
"eval_wmt2019_ru-en_samples_per_second": 283.986,
"eval_wmt2019_ru-en_steps_per_second": 5.964,
"step": 250
},
{
"epoch": 0.01,
"eval_wmt2019_de-en_accuracy": 0.7348780028355294,
"eval_wmt2019_de-en_loss": 1.083984375,
"eval_wmt2019_de-en_runtime": 7.6263,
"eval_wmt2019_de-en_samples_per_second": 393.113,
"eval_wmt2019_de-en_steps_per_second": 8.261,
"step": 250
},
{
"epoch": 0.01,
"eval_wmt2019_fr-de_accuracy": 0.7198844756374025,
"eval_wmt2019_fr-de_loss": 1.1572265625,
"eval_wmt2019_fr-de_runtime": 5.8183,
"eval_wmt2019_fr-de_samples_per_second": 259.868,
"eval_wmt2019_fr-de_steps_per_second": 5.5,
"step": 250
},
{
"epoch": 0.01,
"eval_essay_instruction_accuracy": 0.5889370453088031,
"eval_essay_instruction_loss": 2.01171875,
"eval_essay_instruction_runtime": 4.9645,
"eval_essay_instruction_samples_per_second": 83.191,
"eval_essay_instruction_steps_per_second": 1.813,
"step": 250
},
{
"epoch": 0.01,
"eval_reddit_eli5_accuracy": 0.44436461029042645,
"eval_reddit_eli5_loss": 2.56640625,
"eval_reddit_eli5_runtime": 290.2795,
"eval_reddit_eli5_samples_per_second": 187.843,
"eval_reddit_eli5_steps_per_second": 3.913,
"step": 250
},
{
"epoch": 0.01,
"eval_reddit_askh_accuracy": 0.44699574235962536,
"eval_reddit_askh_loss": 2.666015625,
"eval_reddit_askh_runtime": 127.4754,
"eval_reddit_askh_samples_per_second": 154.579,
"eval_reddit_askh_steps_per_second": 3.224,
"step": 250
},
{
"epoch": 0.01,
"eval_reddit_asks_accuracy": 0.4559295270939454,
"eval_reddit_asks_loss": 2.515625,
"eval_reddit_asks_runtime": 148.9443,
"eval_reddit_asks_samples_per_second": 176.952,
"eval_reddit_asks_steps_per_second": 3.693,
"step": 250
},
{
"epoch": 0.01,
"learning_rate": 4.024955579951363e-06,
"loss": 1.6457,
"step": 260
},
{
"epoch": 0.01,
"learning_rate": 4.05227294026498e-06,
"loss": 1.6765,
"step": 270
},
{
"epoch": 0.01,
"learning_rate": 4.078596718903699e-06,
"loss": 1.7331,
"step": 280
},
{
"epoch": 0.01,
"learning_rate": 4.103996663164927e-06,
"loss": 1.7341,
"step": 290
},
{
"epoch": 0.01,
"learning_rate": 4.128535424532771e-06,
"loss": 1.6712,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 4.152269489723789e-06,
"loss": 1.7221,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 4.17524996386651e-06,
"loss": 1.6535,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 4.197523233129813e-06,
"loss": 1.7102,
"step": 330
},
{
"epoch": 0.02,
"learning_rate": 4.219131528403759e-06,
"loss": 1.7204,
"step": 340
},
{
"epoch": 0.02,
"learning_rate": 4.2401134072504595e-06,
"loss": 1.6645,
"step": 350
},
{
"epoch": 0.02,
"learning_rate": 4.26050416794548e-06,
"loss": 1.6375,
"step": 360
},
{
"epoch": 0.02,
"learning_rate": 4.280336206778326e-06,
"loss": 1.6983,
"step": 370
},
{
"epoch": 0.02,
"learning_rate": 4.299639327694684e-06,
"loss": 1.7058,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 4.318441011710832e-06,
"loss": 1.6323,
"step": 390
},
{
"epoch": 0.02,
"learning_rate": 4.336766652213271e-06,
"loss": 1.6884,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 4.35463976119956e-06,
"loss": 1.72,
"step": 410
},
{
"epoch": 0.02,
"learning_rate": 4.372082150663167e-06,
"loss": 1.6697,
"step": 420
},
{
"epoch": 0.02,
"learning_rate": 4.389114092632645e-06,
"loss": 1.6286,
"step": 430
},
{
"epoch": 0.02,
"learning_rate": 4.405754460810312e-06,
"loss": 1.7086,
"step": 440
},
{
"epoch": 0.02,
"learning_rate": 4.42202085629224e-06,
"loss": 1.6386,
"step": 450
},
{
"epoch": 0.02,
"learning_rate": 4.437929719469291e-06,
"loss": 1.6138,
"step": 460
},
{
"epoch": 0.02,
"learning_rate": 4.453496429892863e-06,
"loss": 1.6216,
"step": 470
},
{
"epoch": 0.02,
"learning_rate": 4.468735395625979e-06,
"loss": 1.7481,
"step": 480
},
{
"epoch": 0.02,
"learning_rate": 4.4836601333808566e-06,
"loss": 1.6641,
"step": 490
},
{
"epoch": 0.02,
"learning_rate": 4.498283340560032e-06,
"loss": 1.6732,
"step": 500
},
{
"epoch": 0.02,
"eval_gsm8k_hard_accuracy": 0.8998615691543503,
"eval_gsm8k_hard_loss": 0.46142578125,
"eval_gsm8k_hard_runtime": 2.1347,
"eval_gsm8k_hard_samples_per_second": 123.669,
"eval_gsm8k_hard_steps_per_second": 2.811,
"step": 500
},
{
"epoch": 0.02,
"eval_webgpt_accuracy": 0.4871765174992017,
"eval_webgpt_loss": 2.30078125,
"eval_webgpt_runtime": 17.9636,
"eval_webgpt_samples_per_second": 217.996,
"eval_webgpt_steps_per_second": 4.565,
"step": 500
},
{
"epoch": 0.02,
"eval_squad_v2_accuracy": 0.8868008286475288,
"eval_squad_v2_loss": 0.397216796875,
"eval_squad_v2_runtime": 88.1593,
"eval_squad_v2_samples_per_second": 295.647,
"eval_squad_v2_steps_per_second": 6.159,
"step": 500
},
{
"epoch": 0.02,
"eval_adversarial_qa_accuracy": 0.8145726993224083,
"eval_adversarial_qa_loss": 0.9189453125,
"eval_adversarial_qa_runtime": 21.7596,
"eval_adversarial_qa_samples_per_second": 275.74,
"eval_adversarial_qa_steps_per_second": 5.745,
"step": 500
},
{
"epoch": 0.02,
"eval_private_tuning_accuracy": 0.6609470207986216,
"eval_private_tuning_loss": 1.2919921875,
"eval_private_tuning_runtime": 64.7973,
"eval_private_tuning_samples_per_second": 326.834,
"eval_private_tuning_steps_per_second": 6.821,
"step": 500
},
{
"epoch": 0.02,
"eval_oa_translated_accuracy": 0.6798870038090434,
"eval_oa_translated_loss": 1.341796875,
"eval_oa_translated_runtime": 733.6761,
"eval_oa_translated_samples_per_second": 190.636,
"eval_oa_translated_steps_per_second": 3.972,
"step": 500
},
{
"epoch": 0.02,
"eval_prosocial_dialogue_accuracy": 0.5295571842042209,
"eval_prosocial_dialogue_loss": 1.8427734375,
"eval_prosocial_dialogue_runtime": 58.5686,
"eval_prosocial_dialogue_samples_per_second": 460.707,
"eval_prosocial_dialogue_steps_per_second": 9.613,
"step": 500
},
{
"epoch": 0.02,
"eval_math_qa_accuracy": 0.5540361105203919,
"eval_math_qa_loss": 1.9853515625,
"eval_math_qa_runtime": 19.0275,
"eval_math_qa_samples_per_second": 313.651,
"eval_math_qa_steps_per_second": 6.569,
"step": 500
},
{
"epoch": 0.02,
"eval_wikihow_accuracy": 0.6014838441270282,
"eval_wikihow_loss": 1.9541015625,
"eval_wikihow_runtime": 7.3976,
"eval_wikihow_samples_per_second": 309.967,
"eval_wikihow_steps_per_second": 6.489,
"step": 500
},
{
"epoch": 0.02,
"eval_joke_accuracy": 0.4797194844579227,
"eval_joke_loss": 2.34375,
"eval_joke_runtime": 0.5281,
"eval_joke_samples_per_second": 143.924,
"eval_joke_steps_per_second": 3.787,
"step": 500
},
{
"epoch": 0.02,
"eval_gsm8k_accuracy": 0.7496816445333818,
"eval_gsm8k_loss": 0.9697265625,
"eval_gsm8k_runtime": 6.3519,
"eval_gsm8k_samples_per_second": 235.363,
"eval_gsm8k_steps_per_second": 5.038,
"step": 500
},
{
"epoch": 0.02,
"eval_ted_trans_en-hi_accuracy": 0.6267826086956522,
"eval_ted_trans_en-hi_loss": 1.455078125,
"eval_ted_trans_en-hi_runtime": 1.042,
"eval_ted_trans_en-hi_samples_per_second": 98.852,
"eval_ted_trans_en-hi_steps_per_second": 2.879,
"step": 500
},
{
"epoch": 0.02,
"eval_ted_trans_de-ja_accuracy": 0.6095081533548741,
"eval_ted_trans_de-ja_loss": 1.736328125,
"eval_ted_trans_de-ja_runtime": 3.4864,
"eval_ted_trans_de-ja_samples_per_second": 205.945,
"eval_ted_trans_de-ja_steps_per_second": 4.302,
"step": 500
},
{
"epoch": 0.02,
"eval_ted_trans_nl-en_accuracy": 0.7156337065657362,
"eval_ted_trans_nl-en_loss": 1.2724609375,
"eval_ted_trans_nl-en_runtime": 4.8663,
"eval_ted_trans_nl-en_samples_per_second": 158.438,
"eval_ted_trans_nl-en_steps_per_second": 3.493,
"step": 500
},
{
"epoch": 0.02,
"eval_ted_trans_en-ja_accuracy": 0.624158725585033,
"eval_ted_trans_en-ja_loss": 1.63671875,
"eval_ted_trans_en-ja_runtime": 4.5137,
"eval_ted_trans_en-ja_samples_per_second": 177.461,
"eval_ted_trans_en-ja_steps_per_second": 3.766,
"step": 500
},
{
"epoch": 0.02,
"eval_ted_trans_en-es_accuracy": 0.7583100576231097,
"eval_ted_trans_en-es_loss": 1.048828125,
"eval_ted_trans_en-es_runtime": 3.4017,
"eval_ted_trans_en-es_samples_per_second": 242.822,
"eval_ted_trans_en-es_steps_per_second": 5.292,
"step": 500
},
{
"epoch": 0.02,
"eval_ted_trans_en-ms_accuracy": 0.7228320526893524,
"eval_ted_trans_en-ms_loss": 1.3466796875,
"eval_ted_trans_en-ms_runtime": 0.9049,
"eval_ted_trans_en-ms_samples_per_second": 46.416,
"eval_ted_trans_en-ms_steps_per_second": 1.105,
"step": 500
},
{
"epoch": 0.02,
"eval_xsum_accuracy": 0.6011633358116925,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 194.0576,
"eval_xsum_samples_per_second": 210.293,
"eval_xsum_steps_per_second": 4.385,
"step": 500
},
{
"epoch": 0.02,
"eval_cnn_dailymail_accuracy": 0.6701096765236707,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 276.9166,
"eval_cnn_dailymail_samples_per_second": 207.366,
"eval_cnn_dailymail_steps_per_second": 4.323,
"step": 500
},
{
"epoch": 0.02,
"eval_multi_news_accuracy": 0.5313963642137016,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 45.9972,
"eval_multi_news_samples_per_second": 195.555,
"eval_multi_news_steps_per_second": 4.087,
"step": 500
},
{
"epoch": 0.02,
"eval_tldr_news_accuracy": 0.5344681651462428,
"eval_tldr_news_loss": 2.201171875,
"eval_tldr_news_runtime": 3.1785,
"eval_tldr_news_samples_per_second": 449.262,
"eval_tldr_news_steps_per_second": 9.438,
"step": 500
},
{
"epoch": 0.02,
"eval_scitldr_accuracy": 0.49756888168557534,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.9905,
"eval_scitldr_samples_per_second": 133.421,
"eval_scitldr_steps_per_second": 3.009,
"step": 500
},
{
"epoch": 0.02,
"eval_samsum_accuracy": 0.6183671538076762,
"eval_samsum_loss": 1.4326171875,
"eval_samsum_runtime": 13.5218,
"eval_samsum_samples_per_second": 217.944,
"eval_samsum_steps_per_second": 4.585,
"step": 500
},
{
"epoch": 0.02,
"eval_debate_sum_accuracy": 0.9346053252084863,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 244.8422,
"eval_debate_sum_samples_per_second": 196.51,
"eval_debate_sum_steps_per_second": 4.097,
"step": 500
},
{
"epoch": 0.02,
"eval_billsum_accuracy": 0.6686931696172409,
"eval_billsum_loss": 1.427734375,
"eval_billsum_runtime": 27.101,
"eval_billsum_samples_per_second": 139.847,
"eval_billsum_steps_per_second": 2.915,
"step": 500
},
{
"epoch": 0.02,
"eval_wmt2019_zh-en_accuracy": 0.6249207026609304,
"eval_wmt2019_zh-en_loss": 1.7001953125,
"eval_wmt2019_zh-en_runtime": 12.5289,
"eval_wmt2019_zh-en_samples_per_second": 317.745,
"eval_wmt2019_zh-en_steps_per_second": 6.625,
"step": 500
},
{
"epoch": 0.02,
"eval_wmt2019_ru-en_accuracy": 0.7221410449334468,
"eval_wmt2019_ru-en_loss": 1.1083984375,
"eval_wmt2019_ru-en_runtime": 10.0702,
"eval_wmt2019_ru-en_samples_per_second": 297.91,
"eval_wmt2019_ru-en_steps_per_second": 6.256,
"step": 500
},
{
"epoch": 0.02,
"eval_wmt2019_de-en_accuracy": 0.7360578210047292,
"eval_wmt2019_de-en_loss": 1.072265625,
"eval_wmt2019_de-en_runtime": 7.6459,
"eval_wmt2019_de-en_samples_per_second": 392.106,
"eval_wmt2019_de-en_steps_per_second": 8.24,
"step": 500
},
{
"epoch": 0.02,
"eval_wmt2019_fr-de_accuracy": 0.7223688705319711,
"eval_wmt2019_fr-de_loss": 1.15234375,
"eval_wmt2019_fr-de_runtime": 5.1746,
"eval_wmt2019_fr-de_samples_per_second": 292.195,
"eval_wmt2019_fr-de_steps_per_second": 6.184,
"step": 500
},
{
"epoch": 0.02,
"eval_essay_instruction_accuracy": 0.5920659841231232,
"eval_essay_instruction_loss": 2.001953125,
"eval_essay_instruction_runtime": 4.5291,
"eval_essay_instruction_samples_per_second": 91.188,
"eval_essay_instruction_steps_per_second": 1.987,
"step": 500
},
{
"epoch": 0.02,
"eval_reddit_eli5_accuracy": 0.4464491832743919,
"eval_reddit_eli5_loss": 2.560546875,
"eval_reddit_eli5_runtime": 282.6054,
"eval_reddit_eli5_samples_per_second": 192.944,
"eval_reddit_eli5_steps_per_second": 4.02,
"step": 500
},
{
"epoch": 0.02,
"eval_reddit_askh_accuracy": 0.4485475495544,
"eval_reddit_askh_loss": 2.65625,
"eval_reddit_askh_runtime": 129.6151,
"eval_reddit_askh_samples_per_second": 152.027,
"eval_reddit_askh_steps_per_second": 3.171,
"step": 500
},
{
"epoch": 0.02,
"eval_reddit_asks_accuracy": 0.4581163067460401,
"eval_reddit_asks_loss": 2.5078125,
"eval_reddit_asks_runtime": 148.6855,
"eval_reddit_asks_samples_per_second": 177.26,
"eval_reddit_asks_steps_per_second": 3.699,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.512616960163228e-06,
"loss": 1.6485,
"step": 510
},
{
"epoch": 0.03,
"learning_rate": 4.526672239391333e-06,
"loss": 1.6097,
"step": 520
},
{
"epoch": 0.03,
"learning_rate": 4.540459782667983e-06,
"loss": 1.6802,
"step": 530
},
{
"epoch": 0.03,
"learning_rate": 4.553989599704948e-06,
"loss": 1.624,
"step": 540
},
{
"epoch": 0.03,
"learning_rate": 4.5672711491570735e-06,
"loss": 1.6027,
"step": 550
},
{
"epoch": 0.03,
"learning_rate": 4.5803133783436676e-06,
"loss": 1.6412,
"step": 560
},
{
"epoch": 0.03,
"learning_rate": 4.5931247594541535e-06,
"loss": 1.6312,
"step": 570
},
{
"epoch": 0.03,
"learning_rate": 4.605713322604896e-06,
"loss": 1.6101,
"step": 580
},
{
"epoch": 0.03,
"learning_rate": 4.61808668607024e-06,
"loss": 1.6172,
"step": 590
},
{
"epoch": 0.03,
"learning_rate": 4.63025208397274e-06,
"loss": 1.6527,
"step": 600
},
{
"epoch": 0.03,
"learning_rate": 4.642216391684613e-06,
"loss": 1.6598,
"step": 610
},
{
"epoch": 0.03,
"learning_rate": 4.653986149163757e-06,
"loss": 1.6289,
"step": 620
},
{
"epoch": 0.03,
"learning_rate": 4.6655675824226375e-06,
"loss": 1.6069,
"step": 630
},
{
"epoch": 0.03,
"learning_rate": 4.676966623306479e-06,
"loss": 1.5908,
"step": 640
},
{
"epoch": 0.03,
"learning_rate": 4.688188927738093e-06,
"loss": 1.6184,
"step": 650
},
{
"epoch": 0.03,
"learning_rate": 4.699239892569782e-06,
"loss": 1.6511,
"step": 660
},
{
"epoch": 0.03,
"learning_rate": 4.710124671168044e-06,
"loss": 1.6089,
"step": 670
},
{
"epoch": 0.03,
"learning_rate": 4.720848187843727e-06,
"loss": 1.74,
"step": 680
},
{
"epoch": 0.03,
"learning_rate": 4.73141515122876e-06,
"loss": 1.5827,
"step": 690
},
{
"epoch": 0.03,
"learning_rate": 4.741830066690428e-06,
"loss": 1.6384,
"step": 700
},
{
"epoch": 0.03,
"learning_rate": 4.752097247865126e-06,
"loss": 1.6331,
"step": 710
},
{
"epoch": 0.04,
"learning_rate": 4.7622208273854484e-06,
"loss": 1.6444,
"step": 720
},
{
"epoch": 0.04,
"learning_rate": 4.772204766867427e-06,
"loss": 1.597,
"step": 730
},
{
"epoch": 0.04,
"learning_rate": 4.782052866218294e-06,
"loss": 1.6041,
"step": 740
},
{
"epoch": 0.04,
"learning_rate": 4.7917687723195e-06,
"loss": 1.584,
"step": 750
},
{
"epoch": 0.04,
"eval_gsm8k_hard_accuracy": 0.9101217377142624,
"eval_gsm8k_hard_loss": 0.4091796875,
"eval_gsm8k_hard_runtime": 2.6619,
"eval_gsm8k_hard_samples_per_second": 99.177,
"eval_gsm8k_hard_steps_per_second": 2.254,
"step": 750
},
{
"epoch": 0.04,
"eval_webgpt_accuracy": 0.4874197343145836,
"eval_webgpt_loss": 2.296875,
"eval_webgpt_runtime": 16.2391,
"eval_webgpt_samples_per_second": 241.147,
"eval_webgpt_steps_per_second": 5.05,
"step": 750
},
{
"epoch": 0.04,
"eval_squad_v2_accuracy": 0.8846798855677223,
"eval_squad_v2_loss": 0.363037109375,
"eval_squad_v2_runtime": 89.6157,
"eval_squad_v2_samples_per_second": 290.842,
"eval_squad_v2_steps_per_second": 6.059,
"step": 750
},
{
"epoch": 0.04,
"eval_adversarial_qa_accuracy": 0.8026626070863556,
"eval_adversarial_qa_loss": 0.8447265625,
"eval_adversarial_qa_runtime": 21.271,
"eval_adversarial_qa_samples_per_second": 282.075,
"eval_adversarial_qa_steps_per_second": 5.877,
"step": 750
},
{
"epoch": 0.04,
"eval_private_tuning_accuracy": 0.6632682821768406,
"eval_private_tuning_loss": 1.2705078125,
"eval_private_tuning_runtime": 65.8398,
"eval_private_tuning_samples_per_second": 321.659,
"eval_private_tuning_steps_per_second": 6.713,
"step": 750
},
{
"epoch": 0.04,
"eval_oa_translated_accuracy": 0.6840566265427955,
"eval_oa_translated_loss": 1.31640625,
"eval_oa_translated_runtime": 739.9323,
"eval_oa_translated_samples_per_second": 189.024,
"eval_oa_translated_steps_per_second": 3.938,
"step": 750
},
{
"epoch": 0.04,
"eval_prosocial_dialogue_accuracy": 0.5235860358801359,
"eval_prosocial_dialogue_loss": 1.8115234375,
"eval_prosocial_dialogue_runtime": 61.6452,
"eval_prosocial_dialogue_samples_per_second": 437.715,
"eval_prosocial_dialogue_steps_per_second": 9.133,
"step": 750
},
{
"epoch": 0.04,
"eval_math_qa_accuracy": 0.5631207954480619,
"eval_math_qa_loss": 1.921875,
"eval_math_qa_runtime": 17.923,
"eval_math_qa_samples_per_second": 332.98,
"eval_math_qa_steps_per_second": 6.974,
"step": 750
},
{
"epoch": 0.04,
"eval_wikihow_accuracy": 0.6049785050617112,
"eval_wikihow_loss": 1.923828125,
"eval_wikihow_runtime": 7.5032,
"eval_wikihow_samples_per_second": 305.602,
"eval_wikihow_steps_per_second": 6.397,
"step": 750
},
{
"epoch": 0.04,
"eval_joke_accuracy": 0.4799090219863533,
"eval_joke_loss": 2.30078125,
"eval_joke_runtime": 1.3898,
"eval_joke_samples_per_second": 54.685,
"eval_joke_steps_per_second": 1.439,
"step": 750
},
{
"epoch": 0.04,
"eval_gsm8k_accuracy": 0.7521515232084633,
"eval_gsm8k_loss": 0.94091796875,
"eval_gsm8k_runtime": 5.0267,
"eval_gsm8k_samples_per_second": 297.41,
"eval_gsm8k_steps_per_second": 6.366,
"step": 750
},
{
"epoch": 0.04,
"eval_ted_trans_en-hi_accuracy": 0.6381951731374607,
"eval_ted_trans_en-hi_loss": 1.3837890625,
"eval_ted_trans_en-hi_runtime": 3.1926,
"eval_ted_trans_en-hi_samples_per_second": 32.262,
"eval_ted_trans_en-hi_steps_per_second": 0.94,
"step": 750
},
{
"epoch": 0.04,
"eval_ted_trans_de-ja_accuracy": 0.6046756766931446,
"eval_ted_trans_de-ja_loss": 1.75390625,
"eval_ted_trans_de-ja_runtime": 4.4495,
"eval_ted_trans_de-ja_samples_per_second": 161.365,
"eval_ted_trans_de-ja_steps_per_second": 3.371,
"step": 750
},
{
"epoch": 0.04,
"eval_ted_trans_nl-en_accuracy": 0.7248159831756046,
"eval_ted_trans_nl-en_loss": 1.2216796875,
"eval_ted_trans_nl-en_runtime": 3.3794,
"eval_ted_trans_nl-en_samples_per_second": 228.144,
"eval_ted_trans_nl-en_steps_per_second": 5.03,
"step": 750
},
{
"epoch": 0.04,
"eval_ted_trans_en-ja_accuracy": 0.62236684020825,
"eval_ted_trans_en-ja_loss": 1.6015625,
"eval_ted_trans_en-ja_runtime": 4.3536,
"eval_ted_trans_en-ja_samples_per_second": 183.987,
"eval_ted_trans_en-ja_steps_per_second": 3.905,
"step": 750
},
{
"epoch": 0.04,
"eval_ted_trans_en-es_accuracy": 0.7690527730088826,
"eval_ted_trans_en-es_loss": 0.99658203125,
"eval_ted_trans_en-es_runtime": 4.0371,
"eval_ted_trans_en-es_samples_per_second": 204.604,
"eval_ted_trans_en-es_steps_per_second": 4.459,
"step": 750
},
{
"epoch": 0.04,
"eval_ted_trans_en-ms_accuracy": 0.6281984334203655,
"eval_ted_trans_en-ms_loss": 1.7548828125,
"eval_ted_trans_en-ms_runtime": 1.3143,
"eval_ted_trans_en-ms_samples_per_second": 31.955,
"eval_ted_trans_en-ms_steps_per_second": 0.761,
"step": 750
},
{
"epoch": 0.04,
"eval_xsum_accuracy": 0.6022120068856478,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 190.7745,
"eval_xsum_samples_per_second": 213.912,
"eval_xsum_steps_per_second": 4.461,
"step": 750
},
{
"epoch": 0.04,
"eval_cnn_dailymail_accuracy": 0.6730714054329214,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 278.1455,
"eval_cnn_dailymail_samples_per_second": 206.45,
"eval_cnn_dailymail_steps_per_second": 4.304,
"step": 750
},
{
"epoch": 0.04,
"eval_multi_news_accuracy": 0.5342626698844151,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 44.9541,
"eval_multi_news_samples_per_second": 200.093,
"eval_multi_news_steps_per_second": 4.182,
"step": 750
},
{
"epoch": 0.04,
"eval_tldr_news_accuracy": 0.5577447568889307,
"eval_tldr_news_loss": 2.03125,
"eval_tldr_news_runtime": 4.8998,
"eval_tldr_news_samples_per_second": 291.441,
"eval_tldr_news_steps_per_second": 6.123,
"step": 750
},
{
"epoch": 0.04,
"eval_scitldr_accuracy": 0.5008103727714749,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.5302,
"eval_scitldr_samples_per_second": 157.698,
"eval_scitldr_steps_per_second": 3.557,
"step": 750
},
{
"epoch": 0.04,
"eval_samsum_accuracy": 0.6208006056591274,
"eval_samsum_loss": 1.40625,
"eval_samsum_runtime": 14.6527,
"eval_samsum_samples_per_second": 201.123,
"eval_samsum_steps_per_second": 4.231,
"step": 750
},
{
"epoch": 0.04,
"eval_debate_sum_accuracy": 0.9359010655534944,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 244.5165,
"eval_debate_sum_samples_per_second": 196.772,
"eval_debate_sum_steps_per_second": 4.102,
"step": 750
},
{
"epoch": 0.04,
"eval_billsum_accuracy": 0.6674552130307286,
"eval_billsum_loss": 1.412109375,
"eval_billsum_runtime": 27.8571,
"eval_billsum_samples_per_second": 136.052,
"eval_billsum_steps_per_second": 2.836,
"step": 750
},
{
"epoch": 0.04,
"eval_wmt2019_zh-en_accuracy": 0.6281871623861053,
"eval_wmt2019_zh-en_loss": 1.689453125,
"eval_wmt2019_zh-en_runtime": 11.4111,
"eval_wmt2019_zh-en_samples_per_second": 348.872,
"eval_wmt2019_zh-en_steps_per_second": 7.274,
"step": 750
},
{
"epoch": 0.04,
"eval_wmt2019_ru-en_accuracy": 0.7208443499252788,
"eval_wmt2019_ru-en_loss": 1.1123046875,
"eval_wmt2019_ru-en_runtime": 10.8964,
"eval_wmt2019_ru-en_samples_per_second": 275.321,
"eval_wmt2019_ru-en_steps_per_second": 5.782,
"step": 750
},
{
"epoch": 0.04,
"eval_wmt2019_de-en_accuracy": 0.7330685618729097,
"eval_wmt2019_de-en_loss": 1.0859375,
"eval_wmt2019_de-en_runtime": 7.6477,
"eval_wmt2019_de-en_samples_per_second": 392.011,
"eval_wmt2019_de-en_steps_per_second": 8.238,
"step": 750
},
{
"epoch": 0.04,
"eval_wmt2019_fr-de_accuracy": 0.7199981458877335,
"eval_wmt2019_fr-de_loss": 1.1474609375,
"eval_wmt2019_fr-de_runtime": 5.4342,
"eval_wmt2019_fr-de_samples_per_second": 278.24,
"eval_wmt2019_fr-de_steps_per_second": 5.889,
"step": 750
},
{
"epoch": 0.04,
"eval_essay_instruction_accuracy": 0.5924197863918802,
"eval_essay_instruction_loss": 1.9921875,
"eval_essay_instruction_runtime": 4.79,
"eval_essay_instruction_samples_per_second": 86.221,
"eval_essay_instruction_steps_per_second": 1.879,
"step": 750
},
{
"epoch": 0.04,
"eval_reddit_eli5_accuracy": 0.4455537602670511,
"eval_reddit_eli5_loss": 2.5546875,
"eval_reddit_eli5_runtime": 268.3918,
"eval_reddit_eli5_samples_per_second": 203.162,
"eval_reddit_eli5_steps_per_second": 4.233,
"step": 750
},
{
"epoch": 0.04,
"eval_reddit_askh_accuracy": 0.44842410632057694,
"eval_reddit_askh_loss": 2.65234375,
"eval_reddit_askh_runtime": 150.7797,
"eval_reddit_askh_samples_per_second": 130.687,
"eval_reddit_askh_steps_per_second": 2.726,
"step": 750
},
{
"epoch": 0.04,
"eval_reddit_asks_accuracy": 0.4572903640086479,
"eval_reddit_asks_loss": 2.501953125,
"eval_reddit_asks_runtime": 135.6226,
"eval_reddit_asks_samples_per_second": 194.333,
"eval_reddit_asks_steps_per_second": 4.055,
"step": 750
},
{
"epoch": 0.04,
"learning_rate": 4.801355987134653e-06,
"loss": 1.5707,
"step": 760
},
{
"epoch": 0.04,
"learning_rate": 4.81081787528747e-06,
"loss": 1.6039,
"step": 770
},
{
"epoch": 0.04,
"learning_rate": 4.820157671150801e-06,
"loss": 1.5763,
"step": 780
},
{
"epoch": 0.04,
"learning_rate": 4.82937848548407e-06,
"loss": 1.6415,
"step": 790
},
{
"epoch": 0.04,
"learning_rate": 4.83848331165324e-06,
"loss": 1.6192,
"step": 800
},
{
"epoch": 0.04,
"learning_rate": 4.847475031464417e-06,
"loss": 1.8104,
"step": 810
},
{
"epoch": 0.04,
"learning_rate": 4.856356420639528e-06,
"loss": 1.6151,
"step": 820
},
{
"epoch": 0.04,
"learning_rate": 4.8651301539601235e-06,
"loss": 1.6213,
"step": 830
},
{
"epoch": 0.04,
"learning_rate": 4.873798810103137e-06,
"loss": 1.5999,
"step": 840
},
{
"epoch": 0.04,
"learning_rate": 4.882364876190489e-06,
"loss": 1.5919,
"step": 850
},
{
"epoch": 0.04,
"learning_rate": 4.890830752072613e-06,
"loss": 1.6093,
"step": 860
},
{
"epoch": 0.04,
"learning_rate": 4.899198754364365e-06,
"loss": 1.6407,
"step": 870
},
{
"epoch": 0.04,
"learning_rate": 4.907471120250281e-06,
"loss": 1.6171,
"step": 880
},
{
"epoch": 0.04,
"learning_rate": 4.915650011074855e-06,
"loss": 1.6894,
"step": 890
},
{
"epoch": 0.04,
"learning_rate": 4.923737515732209e-06,
"loss": 1.6495,
"step": 900
},
{
"epoch": 0.04,
"learning_rate": 4.931735653868489e-06,
"loss": 1.6688,
"step": 910
},
{
"epoch": 0.04,
"learning_rate": 4.93964637890926e-06,
"loss": 1.6085,
"step": 920
},
{
"epoch": 0.05,
"learning_rate": 4.9474715809232256e-06,
"loss": 1.6499,
"step": 930
},
{
"epoch": 0.05,
"learning_rate": 4.955213089332832e-06,
"loss": 1.6319,
"step": 940
},
{
"epoch": 0.05,
"learning_rate": 4.962872675481414e-06,
"loss": 1.5965,
"step": 950
},
{
"epoch": 0.05,
"learning_rate": 4.970452055065948e-06,
"loss": 1.5977,
"step": 960
},
{
"epoch": 0.05,
"learning_rate": 4.977952890443742e-06,
"loss": 1.6161,
"step": 970
},
{
"epoch": 0.05,
"learning_rate": 4.985376792820825e-06,
"loss": 1.5886,
"step": 980
},
{
"epoch": 0.05,
"learning_rate": 4.992725324329251e-06,
"loss": 1.5945,
"step": 990
},
{
"epoch": 0.05,
"learning_rate": 5e-06,
"loss": 1.6211,
"step": 1000
},
{
"epoch": 0.05,
"eval_gsm8k_hard_accuracy": 0.9133382191278857,
"eval_gsm8k_hard_loss": 0.38720703125,
"eval_gsm8k_hard_runtime": 2.5023,
"eval_gsm8k_hard_samples_per_second": 105.503,
"eval_gsm8k_hard_steps_per_second": 2.398,
"step": 1000
},
{
"epoch": 0.05,
"eval_webgpt_accuracy": 0.4873552074043802,
"eval_webgpt_loss": 2.29296875,
"eval_webgpt_runtime": 18.3874,
"eval_webgpt_samples_per_second": 212.972,
"eval_webgpt_steps_per_second": 4.46,
"step": 1000
},
{
"epoch": 0.05,
"eval_squad_v2_accuracy": 0.8973280344987951,
"eval_squad_v2_loss": 0.33642578125,
"eval_squad_v2_runtime": 87.2934,
"eval_squad_v2_samples_per_second": 298.579,
"eval_squad_v2_steps_per_second": 6.22,
"step": 1000
},
{
"epoch": 0.05,
"eval_adversarial_qa_accuracy": 0.810095221038178,
"eval_adversarial_qa_loss": 0.85498046875,
"eval_adversarial_qa_runtime": 21.5732,
"eval_adversarial_qa_samples_per_second": 278.123,
"eval_adversarial_qa_steps_per_second": 5.794,
"step": 1000
},
{
"epoch": 0.05,
"eval_private_tuning_accuracy": 0.6643376777215743,
"eval_private_tuning_loss": 1.2626953125,
"eval_private_tuning_runtime": 61.2475,
"eval_private_tuning_samples_per_second": 345.777,
"eval_private_tuning_steps_per_second": 7.217,
"step": 1000
},
{
"epoch": 0.05,
"eval_oa_translated_accuracy": 0.6876637305407464,
"eval_oa_translated_loss": 1.298828125,
"eval_oa_translated_runtime": 714.9582,
"eval_oa_translated_samples_per_second": 195.627,
"eval_oa_translated_steps_per_second": 4.076,
"step": 1000
},
{
"epoch": 0.05,
"eval_prosocial_dialogue_accuracy": 0.533683742122458,
"eval_prosocial_dialogue_loss": 1.8115234375,
"eval_prosocial_dialogue_runtime": 77.2682,
"eval_prosocial_dialogue_samples_per_second": 349.212,
"eval_prosocial_dialogue_steps_per_second": 7.286,
"step": 1000
},
{
"epoch": 0.05,
"eval_math_qa_accuracy": 0.5697754633111511,
"eval_math_qa_loss": 1.884765625,
"eval_math_qa_runtime": 19.141,
"eval_math_qa_samples_per_second": 311.791,
"eval_math_qa_steps_per_second": 6.53,
"step": 1000
},
{
"epoch": 0.05,
"eval_wikihow_accuracy": 0.6076965746775759,
"eval_wikihow_loss": 1.91015625,
"eval_wikihow_runtime": 7.4493,
"eval_wikihow_samples_per_second": 307.815,
"eval_wikihow_steps_per_second": 6.444,
"step": 1000
},
{
"epoch": 0.05,
"eval_joke_accuracy": 0.4844579226686884,
"eval_joke_loss": 2.287109375,
"eval_joke_runtime": 0.5532,
"eval_joke_samples_per_second": 137.392,
"eval_joke_steps_per_second": 3.616,
"step": 1000
},
{
"epoch": 0.05,
"eval_gsm8k_accuracy": 0.7594911909992863,
"eval_gsm8k_loss": 0.9140625,
"eval_gsm8k_runtime": 5.3345,
"eval_gsm8k_samples_per_second": 280.251,
"eval_gsm8k_steps_per_second": 5.999,
"step": 1000
},
{
"epoch": 0.05,
"eval_ted_trans_en-hi_accuracy": 0.6449365772509877,
"eval_ted_trans_en-hi_loss": 1.306640625,
"eval_ted_trans_en-hi_runtime": 1.7092,
"eval_ted_trans_en-hi_samples_per_second": 60.262,
"eval_ted_trans_en-hi_steps_per_second": 1.755,
"step": 1000
},
{
"epoch": 0.05,
"eval_ted_trans_de-ja_accuracy": 0.6195650127106676,
"eval_ted_trans_de-ja_loss": 1.6767578125,
"eval_ted_trans_de-ja_runtime": 4.3842,
"eval_ted_trans_de-ja_samples_per_second": 163.768,
"eval_ted_trans_de-ja_steps_per_second": 3.421,
"step": 1000
},
{
"epoch": 0.05,
"eval_ted_trans_nl-en_accuracy": 0.7198614136853986,
"eval_ted_trans_nl-en_loss": 1.236328125,
"eval_ted_trans_nl-en_runtime": 3.7037,
"eval_ted_trans_nl-en_samples_per_second": 208.171,
"eval_ted_trans_nl-en_steps_per_second": 4.59,
"step": 1000
},
{
"epoch": 0.05,
"eval_ted_trans_en-ja_accuracy": 0.6259939079868666,
"eval_ted_trans_en-ja_loss": 1.5947265625,
"eval_ted_trans_en-ja_runtime": 4.2274,
"eval_ted_trans_en-ja_samples_per_second": 189.48,
"eval_ted_trans_en-ja_steps_per_second": 4.021,
"step": 1000
},
{
"epoch": 0.05,
"eval_ted_trans_en-es_accuracy": 0.7666360545061928,
"eval_ted_trans_en-es_loss": 1.01171875,
"eval_ted_trans_en-es_runtime": 4.7495,
"eval_ted_trans_en-es_samples_per_second": 173.914,
"eval_ted_trans_en-es_steps_per_second": 3.79,
"step": 1000
},
{
"epoch": 0.05,
"eval_ted_trans_en-ms_accuracy": 0.6276762402088772,
"eval_ted_trans_en-ms_loss": 1.7060546875,
"eval_ted_trans_en-ms_runtime": 0.3249,
"eval_ted_trans_en-ms_samples_per_second": 129.281,
"eval_ted_trans_en-ms_steps_per_second": 3.078,
"step": 1000
},
{
"epoch": 0.05,
"eval_xsum_accuracy": 0.6038378677782439,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 194.7148,
"eval_xsum_samples_per_second": 209.583,
"eval_xsum_steps_per_second": 4.37,
"step": 1000
},
{
"epoch": 0.05,
"eval_cnn_dailymail_accuracy": 0.6724346337174325,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 276.6313,
"eval_cnn_dailymail_samples_per_second": 207.58,
"eval_cnn_dailymail_steps_per_second": 4.327,
"step": 1000
},
{
"epoch": 0.05,
"eval_multi_news_accuracy": 0.5417725136542614,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 44.7388,
"eval_multi_news_samples_per_second": 201.056,
"eval_multi_news_steps_per_second": 4.202,
"step": 1000
},
{
"epoch": 0.05,
"eval_tldr_news_accuracy": 0.5576977334712687,
"eval_tldr_news_loss": 2.015625,
"eval_tldr_news_runtime": 4.3825,
"eval_tldr_news_samples_per_second": 325.843,
"eval_tldr_news_steps_per_second": 6.845,
"step": 1000
},
{
"epoch": 0.05,
"eval_scitldr_accuracy": 0.5008103727714749,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.9018,
"eval_scitldr_samples_per_second": 137.5,
"eval_scitldr_steps_per_second": 3.102,
"step": 1000
},
{
"epoch": 0.05,
"eval_samsum_accuracy": 0.6229095972637186,
"eval_samsum_loss": 1.4013671875,
"eval_samsum_runtime": 14.884,
"eval_samsum_samples_per_second": 197.998,
"eval_samsum_steps_per_second": 4.166,
"step": 1000
},
{
"epoch": 0.05,
"eval_debate_sum_accuracy": 0.9370327058673479,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 243.4339,
"eval_debate_sum_samples_per_second": 197.647,
"eval_debate_sum_steps_per_second": 4.12,
"step": 1000
},
{
"epoch": 0.05,
"eval_billsum_accuracy": 0.6730090224558118,
"eval_billsum_loss": 1.3984375,
"eval_billsum_runtime": 26.3919,
"eval_billsum_samples_per_second": 143.605,
"eval_billsum_steps_per_second": 2.993,
"step": 1000
},
{
"epoch": 0.05,
"eval_wmt2019_zh-en_accuracy": 0.6340022173592529,
"eval_wmt2019_zh-en_loss": 1.66015625,
"eval_wmt2019_zh-en_runtime": 12.9276,
"eval_wmt2019_zh-en_samples_per_second": 307.945,
"eval_wmt2019_zh-en_steps_per_second": 6.42,
"step": 1000
},
{
"epoch": 0.05,
"eval_wmt2019_ru-en_accuracy": 0.7246060048314736,
"eval_wmt2019_ru-en_loss": 1.09375,
"eval_wmt2019_ru-en_runtime": 10.2691,
"eval_wmt2019_ru-en_samples_per_second": 292.138,
"eval_wmt2019_ru-en_steps_per_second": 6.135,
"step": 1000
},
{
"epoch": 0.05,
"eval_wmt2019_de-en_accuracy": 0.7373287943940118,
"eval_wmt2019_de-en_loss": 1.076171875,
"eval_wmt2019_de-en_runtime": 8.2087,
"eval_wmt2019_de-en_samples_per_second": 365.221,
"eval_wmt2019_de-en_steps_per_second": 7.675,
"step": 1000
},
{
"epoch": 0.05,
"eval_wmt2019_fr-de_accuracy": 0.7239072325829329,
"eval_wmt2019_fr-de_loss": 1.1376953125,
"eval_wmt2019_fr-de_runtime": 5.7413,
"eval_wmt2019_fr-de_samples_per_second": 263.356,
"eval_wmt2019_fr-de_steps_per_second": 5.574,
"step": 1000
},
{
"epoch": 0.05,
"eval_essay_instruction_accuracy": 0.5944486212767839,
"eval_essay_instruction_loss": 1.982421875,
"eval_essay_instruction_runtime": 4.9211,
"eval_essay_instruction_samples_per_second": 83.925,
"eval_essay_instruction_steps_per_second": 1.829,
"step": 1000
},
{
"epoch": 0.05,
"eval_reddit_eli5_accuracy": 0.44636294670867693,
"eval_reddit_eli5_loss": 2.552734375,
"eval_reddit_eli5_runtime": 266.8136,
"eval_reddit_eli5_samples_per_second": 204.364,
"eval_reddit_eli5_steps_per_second": 4.258,
"step": 1000
},
{
"epoch": 0.05,
"eval_reddit_askh_accuracy": 0.4487002153615098,
"eval_reddit_askh_loss": 2.65234375,
"eval_reddit_askh_runtime": 136.1822,
"eval_reddit_askh_samples_per_second": 144.696,
"eval_reddit_askh_steps_per_second": 3.018,
"step": 1000
},
{
"epoch": 0.05,
"eval_reddit_asks_accuracy": 0.45779545724968684,
"eval_reddit_asks_loss": 2.501953125,
"eval_reddit_asks_runtime": 151.9509,
"eval_reddit_asks_samples_per_second": 173.451,
"eval_reddit_asks_steps_per_second": 3.62,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 4.998642779587406e-06,
"loss": 1.5937,
"step": 1010
},
{
"epoch": 0.06,
"learning_rate": 4.997134756906744e-06,
"loss": 1.6393,
"step": 1020
},
{
"epoch": 0.06,
"learning_rate": 4.995626734226083e-06,
"loss": 1.64,
"step": 1030
},
{
"epoch": 0.06,
"learning_rate": 4.994118711545422e-06,
"loss": 1.6529,
"step": 1040
},
{
"epoch": 0.06,
"learning_rate": 4.992610688864761e-06,
"loss": 1.6339,
"step": 1050
},
{
"epoch": 0.06,
"learning_rate": 4.9911026661841e-06,
"loss": 1.7531,
"step": 1060
},
{
"epoch": 0.06,
"learning_rate": 4.989594643503439e-06,
"loss": 1.622,
"step": 1070
},
{
"epoch": 0.06,
"learning_rate": 4.988086620822777e-06,
"loss": 1.7041,
"step": 1080
},
{
"epoch": 0.06,
"learning_rate": 4.986578598142116e-06,
"loss": 1.5922,
"step": 1090
},
{
"epoch": 0.06,
"learning_rate": 4.9850705754614555e-06,
"loss": 1.5937,
"step": 1100
},
{
"epoch": 0.06,
"learning_rate": 4.983562552780794e-06,
"loss": 1.6301,
"step": 1110
},
{
"epoch": 0.07,
"learning_rate": 4.982054530100133e-06,
"loss": 1.6054,
"step": 1120
},
{
"epoch": 0.07,
"learning_rate": 4.980546507419472e-06,
"loss": 1.745,
"step": 1130
},
{
"epoch": 0.07,
"learning_rate": 4.9790384847388105e-06,
"loss": 1.6002,
"step": 1140
},
{
"epoch": 0.07,
"learning_rate": 4.97753046205815e-06,
"loss": 1.623,
"step": 1150
},
{
"epoch": 0.07,
"learning_rate": 4.976022439377489e-06,
"loss": 1.6432,
"step": 1160
},
{
"epoch": 0.07,
"learning_rate": 4.974514416696827e-06,
"loss": 1.5977,
"step": 1170
},
{
"epoch": 0.07,
"learning_rate": 4.973006394016166e-06,
"loss": 1.5784,
"step": 1180
},
{
"epoch": 0.07,
"learning_rate": 4.971498371335505e-06,
"loss": 1.7948,
"step": 1190
},
{
"epoch": 0.07,
"learning_rate": 4.969990348654844e-06,
"loss": 1.5515,
"step": 1200
},
{
"epoch": 0.07,
"learning_rate": 4.968482325974183e-06,
"loss": 1.6368,
"step": 1210
},
{
"epoch": 0.07,
"learning_rate": 4.966974303293522e-06,
"loss": 1.6672,
"step": 1220
},
{
"epoch": 0.07,
"learning_rate": 4.96546628061286e-06,
"loss": 1.6363,
"step": 1230
},
{
"epoch": 0.07,
"learning_rate": 4.9639582579321995e-06,
"loss": 1.6082,
"step": 1240
},
{
"epoch": 0.07,
"learning_rate": 4.962450235251539e-06,
"loss": 1.5335,
"step": 1250
},
{
"epoch": 0.07,
"eval_gsm8k_hard_accuracy": 0.9144782378567647,
"eval_gsm8k_hard_loss": 0.38037109375,
"eval_gsm8k_hard_runtime": 2.4991,
"eval_gsm8k_hard_samples_per_second": 105.638,
"eval_gsm8k_hard_steps_per_second": 2.401,
"step": 1250
},
{
"epoch": 0.07,
"eval_webgpt_accuracy": 0.48714508131217954,
"eval_webgpt_loss": 2.287109375,
"eval_webgpt_runtime": 17.8711,
"eval_webgpt_samples_per_second": 219.124,
"eval_webgpt_steps_per_second": 4.588,
"step": 1250
},
{
"epoch": 0.07,
"eval_squad_v2_accuracy": 0.8998647106075339,
"eval_squad_v2_loss": 0.327392578125,
"eval_squad_v2_runtime": 89.3681,
"eval_squad_v2_samples_per_second": 291.648,
"eval_squad_v2_steps_per_second": 6.076,
"step": 1250
},
{
"epoch": 0.07,
"eval_adversarial_qa_accuracy": 0.8150204471508313,
"eval_adversarial_qa_loss": 0.82080078125,
"eval_adversarial_qa_runtime": 21.2941,
"eval_adversarial_qa_samples_per_second": 281.769,
"eval_adversarial_qa_steps_per_second": 5.87,
"step": 1250
},
{
"epoch": 0.07,
"eval_private_tuning_accuracy": 0.6658206227322084,
"eval_private_tuning_loss": 1.251953125,
"eval_private_tuning_runtime": 65.5451,
"eval_private_tuning_samples_per_second": 323.106,
"eval_private_tuning_steps_per_second": 6.743,
"step": 1250
},
{
"epoch": 0.07,
"eval_oa_translated_accuracy": 0.692265258615827,
"eval_oa_translated_loss": 1.2744140625,
"eval_oa_translated_runtime": 743.3603,
"eval_oa_translated_samples_per_second": 188.499,
"eval_oa_translated_steps_per_second": 3.928,
"step": 1250
},
{
"epoch": 0.07,
"eval_prosocial_dialogue_accuracy": 0.5311811844172045,
"eval_prosocial_dialogue_loss": 1.7724609375,
"eval_prosocial_dialogue_runtime": 51.2881,
"eval_prosocial_dialogue_samples_per_second": 526.106,
"eval_prosocial_dialogue_steps_per_second": 10.977,
"step": 1250
},
{
"epoch": 0.07,
"eval_math_qa_accuracy": 0.5769073170225678,
"eval_math_qa_loss": 1.8466796875,
"eval_math_qa_runtime": 19.0785,
"eval_math_qa_samples_per_second": 312.813,
"eval_math_qa_steps_per_second": 6.552,
"step": 1250
},
{
"epoch": 0.07,
"eval_wikihow_accuracy": 0.6035709332963528,
"eval_wikihow_loss": 1.896484375,
"eval_wikihow_runtime": 7.4264,
"eval_wikihow_samples_per_second": 308.762,
"eval_wikihow_steps_per_second": 6.463,
"step": 1250
},
{
"epoch": 0.07,
"eval_joke_accuracy": 0.48474222896133434,
"eval_joke_loss": 2.275390625,
"eval_joke_runtime": 0.6166,
"eval_joke_samples_per_second": 123.256,
"eval_joke_steps_per_second": 3.244,
"step": 1250
},
{
"epoch": 0.07,
"eval_gsm8k_accuracy": 0.7641650690586473,
"eval_gsm8k_loss": 0.8916015625,
"eval_gsm8k_runtime": 5.7365,
"eval_gsm8k_samples_per_second": 260.61,
"eval_gsm8k_steps_per_second": 5.578,
"step": 1250
},
{
"epoch": 0.07,
"eval_ted_trans_en-hi_accuracy": 0.6325462370594394,
"eval_ted_trans_en-hi_loss": 1.4697265625,
"eval_ted_trans_en-hi_runtime": 1.283,
"eval_ted_trans_en-hi_samples_per_second": 80.282,
"eval_ted_trans_en-hi_steps_per_second": 2.338,
"step": 1250
},
{
"epoch": 0.07,
"eval_ted_trans_de-ja_accuracy": 0.6211990075587098,
"eval_ted_trans_de-ja_loss": 1.662109375,
"eval_ted_trans_de-ja_runtime": 3.8274,
"eval_ted_trans_de-ja_samples_per_second": 187.596,
"eval_ted_trans_de-ja_steps_per_second": 3.919,
"step": 1250
},
{
"epoch": 0.07,
"eval_ted_trans_nl-en_accuracy": 0.7194199077125907,
"eval_ted_trans_nl-en_loss": 1.2353515625,
"eval_ted_trans_nl-en_runtime": 4.0007,
"eval_ted_trans_nl-en_samples_per_second": 192.717,
"eval_ted_trans_nl-en_steps_per_second": 4.249,
"step": 1250
},
{
"epoch": 0.07,
"eval_ted_trans_en-ja_accuracy": 0.6241126045950204,
"eval_ted_trans_en-ja_loss": 1.603515625,
"eval_ted_trans_en-ja_runtime": 4.7838,
"eval_ted_trans_en-ja_samples_per_second": 167.442,
"eval_ted_trans_en-ja_steps_per_second": 3.554,
"step": 1250
},
{
"epoch": 0.07,
"eval_ted_trans_en-es_accuracy": 0.7645402663284718,
"eval_ted_trans_en-es_loss": 1.001953125,
"eval_ted_trans_en-es_runtime": 4.101,
"eval_ted_trans_en-es_samples_per_second": 201.413,
"eval_ted_trans_en-es_steps_per_second": 4.389,
"step": 1250
},
{
"epoch": 0.07,
"eval_ted_trans_en-ms_accuracy": 0.6673585884795018,
"eval_ted_trans_en-ms_loss": 1.525390625,
"eval_ted_trans_en-ms_runtime": 1.2996,
"eval_ted_trans_en-ms_samples_per_second": 32.318,
"eval_ted_trans_en-ms_steps_per_second": 0.769,
"step": 1250
},
{
"epoch": 0.07,
"eval_xsum_accuracy": 0.6053592197062103,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 191.7698,
"eval_xsum_samples_per_second": 212.802,
"eval_xsum_steps_per_second": 4.438,
"step": 1250
},
{
"epoch": 0.07,
"eval_cnn_dailymail_accuracy": 0.6746041001434587,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 278.9634,
"eval_cnn_dailymail_samples_per_second": 205.844,
"eval_cnn_dailymail_steps_per_second": 4.291,
"step": 1250
},
{
"epoch": 0.07,
"eval_multi_news_accuracy": 0.5434468524251806,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 45.3002,
"eval_multi_news_samples_per_second": 198.564,
"eval_multi_news_steps_per_second": 4.15,
"step": 1250
},
{
"epoch": 0.07,
"eval_tldr_news_accuracy": 0.5667732530800339,
"eval_tldr_news_loss": 1.9794921875,
"eval_tldr_news_runtime": 3.9274,
"eval_tldr_news_samples_per_second": 363.601,
"eval_tldr_news_steps_per_second": 7.639,
"step": 1250
},
{
"epoch": 0.07,
"eval_scitldr_accuracy": 0.49108589951377635,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.8246,
"eval_scitldr_samples_per_second": 141.259,
"eval_scitldr_steps_per_second": 3.186,
"step": 1250
},
{
"epoch": 0.07,
"eval_samsum_accuracy": 0.6251537806378348,
"eval_samsum_loss": 1.3818359375,
"eval_samsum_runtime": 13.864,
"eval_samsum_samples_per_second": 212.565,
"eval_samsum_steps_per_second": 4.472,
"step": 1250
},
{
"epoch": 0.07,
"eval_debate_sum_accuracy": 0.9387001084116295,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 243.8994,
"eval_debate_sum_samples_per_second": 197.27,
"eval_debate_sum_steps_per_second": 4.112,
"step": 1250
},
{
"epoch": 0.07,
"eval_billsum_accuracy": 0.6722638786214665,
"eval_billsum_loss": 1.392578125,
"eval_billsum_runtime": 27.2836,
"eval_billsum_samples_per_second": 138.911,
"eval_billsum_steps_per_second": 2.896,
"step": 1250
},
{
"epoch": 0.07,
"eval_wmt2019_zh-en_accuracy": 0.6307350879022818,
"eval_wmt2019_zh-en_loss": 1.67578125,
"eval_wmt2019_zh-en_runtime": 11.4742,
"eval_wmt2019_zh-en_samples_per_second": 346.954,
"eval_wmt2019_zh-en_steps_per_second": 7.234,
"step": 1250
},
{
"epoch": 0.07,
"eval_wmt2019_ru-en_accuracy": 0.7234031766265182,
"eval_wmt2019_ru-en_loss": 1.1044921875,
"eval_wmt2019_ru-en_runtime": 11.1292,
"eval_wmt2019_ru-en_samples_per_second": 269.561,
"eval_wmt2019_ru-en_steps_per_second": 5.661,
"step": 1250
},
{
"epoch": 0.07,
"eval_wmt2019_de-en_accuracy": 0.7347881745809497,
"eval_wmt2019_de-en_loss": 1.0703125,
"eval_wmt2019_de-en_runtime": 6.7145,
"eval_wmt2019_de-en_samples_per_second": 446.495,
"eval_wmt2019_de-en_steps_per_second": 9.383,
"step": 1250
},
{
"epoch": 0.07,
"eval_wmt2019_fr-de_accuracy": 0.7223718008231981,
"eval_wmt2019_fr-de_loss": 1.150390625,
"eval_wmt2019_fr-de_runtime": 5.999,
"eval_wmt2019_fr-de_samples_per_second": 252.041,
"eval_wmt2019_fr-de_steps_per_second": 5.334,
"step": 1250
},
{
"epoch": 0.07,
"eval_essay_instruction_accuracy": 0.5959412245981027,
"eval_essay_instruction_loss": 1.9736328125,
"eval_essay_instruction_runtime": 4.9416,
"eval_essay_instruction_samples_per_second": 83.576,
"eval_essay_instruction_steps_per_second": 1.821,
"step": 1250
},
{
"epoch": 0.07,
"eval_reddit_eli5_accuracy": 0.44527728061091026,
"eval_reddit_eli5_loss": 2.548828125,
"eval_reddit_eli5_runtime": 290.9524,
"eval_reddit_eli5_samples_per_second": 187.409,
"eval_reddit_eli5_steps_per_second": 3.904,
"step": 1250
},
{
"epoch": 0.07,
"eval_reddit_askh_accuracy": 0.44824522875076156,
"eval_reddit_askh_loss": 2.6484375,
"eval_reddit_askh_runtime": 112.213,
"eval_reddit_askh_samples_per_second": 175.604,
"eval_reddit_askh_steps_per_second": 3.663,
"step": 1250
},
{
"epoch": 0.07,
"eval_reddit_asks_accuracy": 0.45689164053284076,
"eval_reddit_asks_loss": 2.498046875,
"eval_reddit_asks_runtime": 165.2712,
"eval_reddit_asks_samples_per_second": 159.471,
"eval_reddit_asks_steps_per_second": 3.328,
"step": 1250
},
{
"epoch": 0.07,
"learning_rate": 4.960942212570877e-06,
"loss": 1.6061,
"step": 1260
},
{
"epoch": 0.07,
"learning_rate": 4.959434189890216e-06,
"loss": 1.6272,
"step": 1270
},
{
"epoch": 0.07,
"learning_rate": 4.957926167209555e-06,
"loss": 1.58,
"step": 1280
},
{
"epoch": 0.08,
"learning_rate": 4.9564181445288936e-06,
"loss": 1.5576,
"step": 1290
},
{
"epoch": 0.08,
"learning_rate": 4.9549101218482336e-06,
"loss": 1.5794,
"step": 1300
},
{
"epoch": 0.08,
"learning_rate": 4.953402099167572e-06,
"loss": 1.5694,
"step": 1310
},
{
"epoch": 0.08,
"learning_rate": 4.95189407648691e-06,
"loss": 1.6599,
"step": 1320
},
{
"epoch": 0.08,
"learning_rate": 4.95038605380625e-06,
"loss": 1.5774,
"step": 1330
},
{
"epoch": 0.08,
"learning_rate": 4.9488780311255885e-06,
"loss": 1.5876,
"step": 1340
},
{
"epoch": 0.08,
"learning_rate": 4.947370008444927e-06,
"loss": 1.6382,
"step": 1350
},
{
"epoch": 0.08,
"learning_rate": 4.945861985764267e-06,
"loss": 1.6248,
"step": 1360
},
{
"epoch": 0.08,
"learning_rate": 4.944353963083605e-06,
"loss": 1.6227,
"step": 1370
},
{
"epoch": 0.08,
"learning_rate": 4.9428459404029434e-06,
"loss": 1.592,
"step": 1380
},
{
"epoch": 0.08,
"learning_rate": 4.9413379177222834e-06,
"loss": 1.6044,
"step": 1390
},
{
"epoch": 0.08,
"learning_rate": 4.939829895041622e-06,
"loss": 1.6386,
"step": 1400
},
{
"epoch": 0.08,
"learning_rate": 4.938321872360961e-06,
"loss": 1.5863,
"step": 1410
},
{
"epoch": 0.08,
"learning_rate": 4.9368138496803e-06,
"loss": 1.6269,
"step": 1420
},
{
"epoch": 0.08,
"learning_rate": 4.935305826999638e-06,
"loss": 1.6145,
"step": 1430
},
{
"epoch": 0.08,
"learning_rate": 4.9337978043189775e-06,
"loss": 1.5194,
"step": 1440
},
{
"epoch": 0.08,
"learning_rate": 4.932289781638317e-06,
"loss": 1.5841,
"step": 1450
},
{
"epoch": 0.09,
"learning_rate": 4.930781758957655e-06,
"loss": 1.5677,
"step": 1460
},
{
"epoch": 0.09,
"learning_rate": 4.929273736276994e-06,
"loss": 1.6513,
"step": 1470
},
{
"epoch": 0.09,
"learning_rate": 4.927765713596333e-06,
"loss": 1.5514,
"step": 1480
},
{
"epoch": 0.09,
"learning_rate": 4.926257690915672e-06,
"loss": 1.6071,
"step": 1490
},
{
"epoch": 0.09,
"learning_rate": 4.924749668235011e-06,
"loss": 1.5484,
"step": 1500
},
{
"epoch": 0.09,
"eval_gsm8k_hard_accuracy": 0.9160661210862749,
"eval_gsm8k_hard_loss": 0.36669921875,
"eval_gsm8k_hard_runtime": 2.426,
"eval_gsm8k_hard_samples_per_second": 108.822,
"eval_gsm8k_hard_steps_per_second": 2.473,
"step": 1500
},
{
"epoch": 0.09,
"eval_webgpt_accuracy": 0.4889121259300561,
"eval_webgpt_loss": 2.275390625,
"eval_webgpt_runtime": 16.7849,
"eval_webgpt_samples_per_second": 233.305,
"eval_webgpt_steps_per_second": 4.885,
"step": 1500
},
{
"epoch": 0.09,
"eval_squad_v2_accuracy": 0.8985118166828732,
"eval_squad_v2_loss": 0.3203125,
"eval_squad_v2_runtime": 89.0026,
"eval_squad_v2_samples_per_second": 292.845,
"eval_squad_v2_steps_per_second": 6.101,
"step": 1500
},
{
"epoch": 0.09,
"eval_adversarial_qa_accuracy": 0.8153786454135697,
"eval_adversarial_qa_loss": 0.80224609375,
"eval_adversarial_qa_runtime": 21.5777,
"eval_adversarial_qa_samples_per_second": 278.064,
"eval_adversarial_qa_steps_per_second": 5.793,
"step": 1500
},
{
"epoch": 0.09,
"eval_private_tuning_accuracy": 0.6673820897933299,
"eval_private_tuning_loss": 1.2412109375,
"eval_private_tuning_runtime": 61.4345,
"eval_private_tuning_samples_per_second": 344.725,
"eval_private_tuning_steps_per_second": 7.195,
"step": 1500
},
{
"epoch": 0.09,
"eval_oa_translated_accuracy": 0.694931804897923,
"eval_oa_translated_loss": 1.259765625,
"eval_oa_translated_runtime": 747.634,
"eval_oa_translated_samples_per_second": 187.422,
"eval_oa_translated_steps_per_second": 3.906,
"step": 1500
},
{
"epoch": 0.09,
"eval_prosocial_dialogue_accuracy": 0.5319418402546676,
"eval_prosocial_dialogue_loss": 1.7685546875,
"eval_prosocial_dialogue_runtime": 40.2134,
"eval_prosocial_dialogue_samples_per_second": 670.995,
"eval_prosocial_dialogue_steps_per_second": 14.0,
"step": 1500
},
{
"epoch": 0.09,
"eval_math_qa_accuracy": 0.5807196170064887,
"eval_math_qa_loss": 1.8212890625,
"eval_math_qa_runtime": 19.1438,
"eval_math_qa_samples_per_second": 311.745,
"eval_math_qa_steps_per_second": 6.53,
"step": 1500
},
{
"epoch": 0.09,
"eval_wikihow_accuracy": 0.6014214394674803,
"eval_wikihow_loss": 1.896484375,
"eval_wikihow_runtime": 7.4397,
"eval_wikihow_samples_per_second": 308.21,
"eval_wikihow_steps_per_second": 6.452,
"step": 1500
},
{
"epoch": 0.09,
"eval_joke_accuracy": 0.4866376042456406,
"eval_joke_loss": 2.24609375,
"eval_joke_runtime": 0.5334,
"eval_joke_samples_per_second": 142.487,
"eval_joke_steps_per_second": 3.75,
"step": 1500
},
{
"epoch": 0.09,
"eval_gsm8k_accuracy": 0.7673556205482711,
"eval_gsm8k_loss": 0.87646484375,
"eval_gsm8k_runtime": 6.0291,
"eval_gsm8k_samples_per_second": 247.963,
"eval_gsm8k_steps_per_second": 5.308,
"step": 1500
},
{
"epoch": 0.09,
"eval_ted_trans_en-hi_accuracy": 0.6494347770862441,
"eval_ted_trans_en-hi_loss": 1.390625,
"eval_ted_trans_en-hi_runtime": 1.0351,
"eval_ted_trans_en-hi_samples_per_second": 99.508,
"eval_ted_trans_en-hi_steps_per_second": 2.898,
"step": 1500
},
{
"epoch": 0.09,
"eval_ted_trans_de-ja_accuracy": 0.616848081514485,
"eval_ted_trans_de-ja_loss": 1.677734375,
"eval_ted_trans_de-ja_runtime": 3.7491,
"eval_ted_trans_de-ja_samples_per_second": 191.511,
"eval_ted_trans_de-ja_steps_per_second": 4.001,
"step": 1500
},
{
"epoch": 0.09,
"eval_ted_trans_nl-en_accuracy": 0.727337334175616,
"eval_ted_trans_nl-en_loss": 1.2138671875,
"eval_ted_trans_nl-en_runtime": 3.6692,
"eval_ted_trans_nl-en_samples_per_second": 210.129,
"eval_ted_trans_nl-en_steps_per_second": 4.633,
"step": 1500
},
{
"epoch": 0.09,
"eval_ted_trans_en-ja_accuracy": 0.626860854480171,
"eval_ted_trans_en-ja_loss": 1.591796875,
"eval_ted_trans_en-ja_runtime": 4.8535,
"eval_ted_trans_en-ja_samples_per_second": 165.036,
"eval_ted_trans_en-ja_steps_per_second": 3.503,
"step": 1500
},
{
"epoch": 0.09,
"eval_ted_trans_en-es_accuracy": 0.7669356066372355,
"eval_ted_trans_en-es_loss": 0.98388671875,
"eval_ted_trans_en-es_runtime": 4.1425,
"eval_ted_trans_en-es_samples_per_second": 199.395,
"eval_ted_trans_en-es_steps_per_second": 4.345,
"step": 1500
},
{
"epoch": 0.09,
"eval_ted_trans_en-ms_accuracy": 0.6616502335236119,
"eval_ted_trans_en-ms_loss": 1.5517578125,
"eval_ted_trans_en-ms_runtime": 0.9865,
"eval_ted_trans_en-ms_samples_per_second": 42.573,
"eval_ted_trans_en-ms_steps_per_second": 1.014,
"step": 1500
},
{
"epoch": 0.09,
"eval_xsum_accuracy": 0.6076375151351375,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 192.6396,
"eval_xsum_samples_per_second": 211.841,
"eval_xsum_steps_per_second": 4.418,
"step": 1500
},
{
"epoch": 0.09,
"eval_cnn_dailymail_accuracy": 0.6761071775649035,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 278.6917,
"eval_cnn_dailymail_samples_per_second": 206.045,
"eval_cnn_dailymail_steps_per_second": 4.295,
"step": 1500
},
{
"epoch": 0.09,
"eval_multi_news_accuracy": 0.5442969643083958,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 45.8208,
"eval_multi_news_samples_per_second": 196.308,
"eval_multi_news_steps_per_second": 4.103,
"step": 1500
},
{
"epoch": 0.09,
"eval_tldr_news_accuracy": 0.5842189410326343,
"eval_tldr_news_loss": 1.8740234375,
"eval_tldr_news_runtime": 3.8029,
"eval_tldr_news_samples_per_second": 375.502,
"eval_tldr_news_steps_per_second": 7.889,
"step": 1500
},
{
"epoch": 0.09,
"eval_scitldr_accuracy": 0.49108589951377635,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.7953,
"eval_scitldr_samples_per_second": 142.738,
"eval_scitldr_steps_per_second": 3.22,
"step": 1500
},
{
"epoch": 0.09,
"eval_samsum_accuracy": 0.6296691857399721,
"eval_samsum_loss": 1.359375,
"eval_samsum_runtime": 14.6559,
"eval_samsum_samples_per_second": 201.08,
"eval_samsum_steps_per_second": 4.23,
"step": 1500
},
{
"epoch": 0.09,
"eval_debate_sum_accuracy": 0.9376602666300555,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 243.7385,
"eval_debate_sum_samples_per_second": 197.4,
"eval_debate_sum_steps_per_second": 4.115,
"step": 1500
},
{
"epoch": 0.09,
"eval_billsum_accuracy": 0.674664779787867,
"eval_billsum_loss": 1.3837890625,
"eval_billsum_runtime": 28.345,
"eval_billsum_samples_per_second": 133.71,
"eval_billsum_steps_per_second": 2.787,
"step": 1500
},
{
"epoch": 0.09,
"eval_wmt2019_zh-en_accuracy": 0.6330300731414937,
"eval_wmt2019_zh-en_loss": 1.6611328125,
"eval_wmt2019_zh-en_runtime": 11.8726,
"eval_wmt2019_zh-en_samples_per_second": 335.309,
"eval_wmt2019_zh-en_steps_per_second": 6.991,
"step": 1500
},
{
"epoch": 0.09,
"eval_wmt2019_ru-en_accuracy": 0.7280151437129657,
"eval_wmt2019_ru-en_loss": 1.083984375,
"eval_wmt2019_ru-en_runtime": 10.0609,
"eval_wmt2019_ru-en_samples_per_second": 298.183,
"eval_wmt2019_ru-en_steps_per_second": 6.262,
"step": 1500
},
{
"epoch": 0.09,
"eval_wmt2019_de-en_accuracy": 0.7374484938192584,
"eval_wmt2019_de-en_loss": 1.05859375,
"eval_wmt2019_de-en_runtime": 7.6833,
"eval_wmt2019_de-en_samples_per_second": 390.199,
"eval_wmt2019_de-en_steps_per_second": 8.2,
"step": 1500
},
{
"epoch": 0.09,
"eval_wmt2019_fr-de_accuracy": 0.7249404264537492,
"eval_wmt2019_fr-de_loss": 1.1298828125,
"eval_wmt2019_fr-de_runtime": 4.7455,
"eval_wmt2019_fr-de_samples_per_second": 318.618,
"eval_wmt2019_fr-de_steps_per_second": 6.743,
"step": 1500
},
{
"epoch": 0.09,
"eval_essay_instruction_accuracy": 0.5962563297437145,
"eval_essay_instruction_loss": 1.96484375,
"eval_essay_instruction_runtime": 4.8055,
"eval_essay_instruction_samples_per_second": 85.943,
"eval_essay_instruction_steps_per_second": 1.873,
"step": 1500
},
{
"epoch": 0.09,
"eval_reddit_eli5_accuracy": 0.44550306361932773,
"eval_reddit_eli5_loss": 2.546875,
"eval_reddit_eli5_runtime": 268.1996,
"eval_reddit_eli5_samples_per_second": 203.308,
"eval_reddit_eli5_steps_per_second": 4.236,
"step": 1500
},
{
"epoch": 0.09,
"eval_reddit_askh_accuracy": 0.44892602615508864,
"eval_reddit_askh_loss": 2.642578125,
"eval_reddit_askh_runtime": 135.1366,
"eval_reddit_askh_samples_per_second": 145.815,
"eval_reddit_askh_steps_per_second": 3.041,
"step": 1500
},
{
"epoch": 0.09,
"eval_reddit_asks_accuracy": 0.4570882832072159,
"eval_reddit_asks_loss": 2.49609375,
"eval_reddit_asks_runtime": 151.4006,
"eval_reddit_asks_samples_per_second": 174.081,
"eval_reddit_asks_steps_per_second": 3.633,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 4.92324164555435e-06,
"loss": 1.5053,
"step": 1510
},
{
"epoch": 0.09,
"learning_rate": 4.921733622873688e-06,
"loss": 1.6214,
"step": 1520
},
{
"epoch": 0.09,
"learning_rate": 4.920225600193027e-06,
"loss": 1.5547,
"step": 1530
},
{
"epoch": 0.09,
"learning_rate": 4.9187175775123666e-06,
"loss": 1.5706,
"step": 1540
},
{
"epoch": 0.09,
"learning_rate": 4.917209554831705e-06,
"loss": 1.6485,
"step": 1550
},
{
"epoch": 0.09,
"learning_rate": 4.915701532151044e-06,
"loss": 1.6188,
"step": 1560
},
{
"epoch": 0.09,
"learning_rate": 4.914193509470383e-06,
"loss": 1.5883,
"step": 1570
},
{
"epoch": 0.09,
"learning_rate": 4.9126854867897215e-06,
"loss": 1.5876,
"step": 1580
},
{
"epoch": 0.09,
"learning_rate": 4.911177464109061e-06,
"loss": 1.5883,
"step": 1590
},
{
"epoch": 0.09,
"learning_rate": 4.9096694414284e-06,
"loss": 1.584,
"step": 1600
},
{
"epoch": 0.09,
"learning_rate": 4.908161418747738e-06,
"loss": 1.6226,
"step": 1610
},
{
"epoch": 0.09,
"learning_rate": 4.906653396067077e-06,
"loss": 1.5241,
"step": 1620
},
{
"epoch": 0.1,
"learning_rate": 4.9051453733864164e-06,
"loss": 1.552,
"step": 1630
},
{
"epoch": 0.1,
"learning_rate": 4.903637350705755e-06,
"loss": 1.6007,
"step": 1640
},
{
"epoch": 0.1,
"learning_rate": 4.902129328025094e-06,
"loss": 1.6413,
"step": 1650
},
{
"epoch": 0.1,
"learning_rate": 4.900621305344433e-06,
"loss": 1.5901,
"step": 1660
},
{
"epoch": 0.1,
"learning_rate": 4.899113282663771e-06,
"loss": 1.618,
"step": 1670
},
{
"epoch": 0.1,
"learning_rate": 4.8976052599831105e-06,
"loss": 1.6042,
"step": 1680
},
{
"epoch": 0.1,
"learning_rate": 4.89609723730245e-06,
"loss": 1.6138,
"step": 1690
},
{
"epoch": 0.1,
"learning_rate": 4.894589214621788e-06,
"loss": 1.5651,
"step": 1700
},
{
"epoch": 0.1,
"learning_rate": 4.893081191941127e-06,
"loss": 1.557,
"step": 1710
},
{
"epoch": 0.1,
"learning_rate": 4.891573169260466e-06,
"loss": 1.589,
"step": 1720
},
{
"epoch": 0.1,
"learning_rate": 4.890065146579805e-06,
"loss": 1.6019,
"step": 1730
},
{
"epoch": 0.1,
"learning_rate": 4.888557123899144e-06,
"loss": 1.5311,
"step": 1740
},
{
"epoch": 0.1,
"learning_rate": 4.887049101218483e-06,
"loss": 1.5996,
"step": 1750
},
{
"epoch": 0.1,
"eval_gsm8k_hard_accuracy": 0.917939008998005,
"eval_gsm8k_hard_loss": 0.353759765625,
"eval_gsm8k_hard_runtime": 2.0742,
"eval_gsm8k_hard_samples_per_second": 127.281,
"eval_gsm8k_hard_steps_per_second": 2.893,
"step": 1750
},
{
"epoch": 0.1,
"eval_webgpt_accuracy": 0.48935223254836624,
"eval_webgpt_loss": 2.275390625,
"eval_webgpt_runtime": 18.9851,
"eval_webgpt_samples_per_second": 206.267,
"eval_webgpt_steps_per_second": 4.319,
"step": 1750
},
{
"epoch": 0.1,
"eval_squad_v2_accuracy": 0.8966656801815133,
"eval_squad_v2_loss": 0.312255859375,
"eval_squad_v2_runtime": 87.2749,
"eval_squad_v2_samples_per_second": 298.642,
"eval_squad_v2_steps_per_second": 6.222,
"step": 1750
},
{
"epoch": 0.1,
"eval_adversarial_qa_accuracy": 0.7900062684695979,
"eval_adversarial_qa_loss": 0.8447265625,
"eval_adversarial_qa_runtime": 21.2806,
"eval_adversarial_qa_samples_per_second": 281.947,
"eval_adversarial_qa_steps_per_second": 5.874,
"step": 1750
},
{
"epoch": 0.1,
"eval_private_tuning_accuracy": 0.6689839396232736,
"eval_private_tuning_loss": 1.2333984375,
"eval_private_tuning_runtime": 65.9372,
"eval_private_tuning_samples_per_second": 321.185,
"eval_private_tuning_steps_per_second": 6.703,
"step": 1750
},
{
"epoch": 0.1,
"eval_oa_translated_accuracy": 0.697770983402211,
"eval_oa_translated_loss": 1.24609375,
"eval_oa_translated_runtime": 707.9067,
"eval_oa_translated_samples_per_second": 197.94,
"eval_oa_translated_steps_per_second": 4.125,
"step": 1750
},
{
"epoch": 0.1,
"eval_prosocial_dialogue_accuracy": 0.5329268895641822,
"eval_prosocial_dialogue_loss": 1.771484375,
"eval_prosocial_dialogue_runtime": 94.875,
"eval_prosocial_dialogue_samples_per_second": 284.406,
"eval_prosocial_dialogue_steps_per_second": 5.934,
"step": 1750
},
{
"epoch": 0.1,
"eval_math_qa_accuracy": 0.5841662474001151,
"eval_math_qa_loss": 1.80078125,
"eval_math_qa_runtime": 18.717,
"eval_math_qa_samples_per_second": 318.854,
"eval_math_qa_steps_per_second": 6.678,
"step": 1750
},
{
"epoch": 0.1,
"eval_wikihow_accuracy": 0.6094855082512828,
"eval_wikihow_loss": 1.8798828125,
"eval_wikihow_runtime": 8.1158,
"eval_wikihow_samples_per_second": 282.535,
"eval_wikihow_steps_per_second": 5.914,
"step": 1750
},
{
"epoch": 0.1,
"eval_joke_accuracy": 0.4837945413191812,
"eval_joke_loss": 2.259765625,
"eval_joke_runtime": 0.8936,
"eval_joke_samples_per_second": 85.053,
"eval_joke_steps_per_second": 2.238,
"step": 1750
},
{
"epoch": 0.1,
"eval_gsm8k_accuracy": 0.7703712514518408,
"eval_gsm8k_loss": 0.86376953125,
"eval_gsm8k_runtime": 5.5568,
"eval_gsm8k_samples_per_second": 269.039,
"eval_gsm8k_steps_per_second": 5.759,
"step": 1750
},
{
"epoch": 0.1,
"eval_ted_trans_en-hi_accuracy": 0.6408450704225352,
"eval_ted_trans_en-hi_loss": 1.4541015625,
"eval_ted_trans_en-hi_runtime": 1.7176,
"eval_ted_trans_en-hi_samples_per_second": 59.968,
"eval_ted_trans_en-hi_steps_per_second": 1.747,
"step": 1750
},
{
"epoch": 0.1,
"eval_ted_trans_de-ja_accuracy": 0.6207718785454686,
"eval_ted_trans_de-ja_loss": 1.6513671875,
"eval_ted_trans_de-ja_runtime": 4.1297,
"eval_ted_trans_de-ja_samples_per_second": 173.862,
"eval_ted_trans_de-ja_steps_per_second": 3.632,
"step": 1750
},
{
"epoch": 0.1,
"eval_ted_trans_nl-en_accuracy": 0.7211525867714473,
"eval_ted_trans_nl-en_loss": 1.2236328125,
"eval_ted_trans_nl-en_runtime": 4.0953,
"eval_ted_trans_nl-en_samples_per_second": 188.265,
"eval_ted_trans_nl-en_steps_per_second": 4.151,
"step": 1750
},
{
"epoch": 0.1,
"eval_ted_trans_en-ja_accuracy": 0.6330966258927838,
"eval_ted_trans_en-ja_loss": 1.5634765625,
"eval_ted_trans_en-ja_runtime": 4.1542,
"eval_ted_trans_en-ja_samples_per_second": 192.817,
"eval_ted_trans_en-ja_steps_per_second": 4.092,
"step": 1750
},
{
"epoch": 0.1,
"eval_ted_trans_en-es_accuracy": 0.7685958860355181,
"eval_ted_trans_en-es_loss": 0.984375,
"eval_ted_trans_en-es_runtime": 4.9915,
"eval_ted_trans_en-es_samples_per_second": 165.482,
"eval_ted_trans_en-es_steps_per_second": 3.606,
"step": 1750
},
{
"epoch": 0.1,
"eval_ted_trans_en-ms_accuracy": 0.6491956408925791,
"eval_ted_trans_en-ms_loss": 1.5751953125,
"eval_ted_trans_en-ms_runtime": 0.4795,
"eval_ted_trans_en-ms_samples_per_second": 87.587,
"eval_ted_trans_en-ms_steps_per_second": 2.085,
"step": 1750
},
{
"epoch": 0.1,
"eval_xsum_accuracy": 0.6072009770109794,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 194.4193,
"eval_xsum_samples_per_second": 209.902,
"eval_xsum_steps_per_second": 4.377,
"step": 1750
},
{
"epoch": 0.1,
"eval_cnn_dailymail_accuracy": 0.6753130640011107,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 277.0946,
"eval_cnn_dailymail_samples_per_second": 207.232,
"eval_cnn_dailymail_steps_per_second": 4.32,
"step": 1750
},
{
"epoch": 0.1,
"eval_multi_news_accuracy": 0.5451066937634955,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 46.0554,
"eval_multi_news_samples_per_second": 195.308,
"eval_multi_news_steps_per_second": 4.082,
"step": 1750
},
{
"epoch": 0.1,
"eval_tldr_news_accuracy": 0.5815386062259005,
"eval_tldr_news_loss": 1.869140625,
"eval_tldr_news_runtime": 3.242,
"eval_tldr_news_samples_per_second": 440.47,
"eval_tldr_news_steps_per_second": 9.254,
"step": 1750
},
{
"epoch": 0.1,
"eval_scitldr_accuracy": 0.49108589951377635,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.7516,
"eval_scitldr_samples_per_second": 145.006,
"eval_scitldr_steps_per_second": 3.271,
"step": 1750
},
{
"epoch": 0.1,
"eval_samsum_accuracy": 0.6312103719125579,
"eval_samsum_loss": 1.3564453125,
"eval_samsum_runtime": 13.9794,
"eval_samsum_samples_per_second": 210.81,
"eval_samsum_steps_per_second": 4.435,
"step": 1750
},
{
"epoch": 0.1,
"eval_debate_sum_accuracy": 0.9370392048784827,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 244.8227,
"eval_debate_sum_samples_per_second": 196.526,
"eval_debate_sum_steps_per_second": 4.097,
"step": 1750
},
{
"epoch": 0.1,
"eval_billsum_accuracy": 0.6767193885992487,
"eval_billsum_loss": 1.3740234375,
"eval_billsum_runtime": 27.455,
"eval_billsum_samples_per_second": 138.044,
"eval_billsum_steps_per_second": 2.877,
"step": 1750
},
{
"epoch": 0.1,
"eval_wmt2019_zh-en_accuracy": 0.6325426044271557,
"eval_wmt2019_zh-en_loss": 1.658203125,
"eval_wmt2019_zh-en_runtime": 12.0617,
"eval_wmt2019_zh-en_samples_per_second": 330.053,
"eval_wmt2019_zh-en_steps_per_second": 6.881,
"step": 1750
},
{
"epoch": 0.1,
"eval_wmt2019_ru-en_accuracy": 0.7264274688562131,
"eval_wmt2019_ru-en_loss": 1.0927734375,
"eval_wmt2019_ru-en_runtime": 10.931,
"eval_wmt2019_ru-en_samples_per_second": 274.449,
"eval_wmt2019_ru-en_steps_per_second": 5.763,
"step": 1750
},
{
"epoch": 0.1,
"eval_wmt2019_de-en_accuracy": 0.7404288514621754,
"eval_wmt2019_de-en_loss": 1.0498046875,
"eval_wmt2019_de-en_runtime": 7.1074,
"eval_wmt2019_de-en_samples_per_second": 421.813,
"eval_wmt2019_de-en_steps_per_second": 8.864,
"step": 1750
},
{
"epoch": 0.1,
"eval_wmt2019_fr-de_accuracy": 0.7267868761037272,
"eval_wmt2019_fr-de_loss": 1.1259765625,
"eval_wmt2019_fr-de_runtime": 6.3509,
"eval_wmt2019_fr-de_samples_per_second": 238.077,
"eval_wmt2019_fr-de_steps_per_second": 5.039,
"step": 1750
},
{
"epoch": 0.1,
"eval_essay_instruction_accuracy": 0.5968423147513433,
"eval_essay_instruction_loss": 1.9580078125,
"eval_essay_instruction_runtime": 4.8143,
"eval_essay_instruction_samples_per_second": 85.786,
"eval_essay_instruction_steps_per_second": 1.869,
"step": 1750
},
{
"epoch": 0.1,
"eval_reddit_eli5_accuracy": 0.4457660851447585,
"eval_reddit_eli5_loss": 2.54296875,
"eval_reddit_eli5_runtime": 291.6459,
"eval_reddit_eli5_samples_per_second": 186.963,
"eval_reddit_eli5_steps_per_second": 3.895,
"step": 1750
},
{
"epoch": 0.1,
"eval_reddit_askh_accuracy": 0.4493032630102438,
"eval_reddit_askh_loss": 2.640625,
"eval_reddit_askh_runtime": 111.9151,
"eval_reddit_askh_samples_per_second": 176.071,
"eval_reddit_askh_steps_per_second": 3.672,
"step": 1750
},
{
"epoch": 0.1,
"eval_reddit_asks_accuracy": 0.4576012073512167,
"eval_reddit_asks_loss": 2.4921875,
"eval_reddit_asks_runtime": 151.9908,
"eval_reddit_asks_samples_per_second": 173.405,
"eval_reddit_asks_steps_per_second": 3.619,
"step": 1750
},
{
"epoch": 0.1,
"learning_rate": 4.885541078537821e-06,
"loss": 1.5723,
"step": 1760
},
{
"epoch": 0.1,
"learning_rate": 4.88403305585716e-06,
"loss": 1.5749,
"step": 1770
},
{
"epoch": 0.1,
"learning_rate": 4.8825250331764996e-06,
"loss": 1.6074,
"step": 1780
},
{
"epoch": 0.1,
"learning_rate": 4.881167812763905e-06,
"loss": 1.611,
"step": 1790
},
{
"epoch": 0.11,
"learning_rate": 4.879659790083243e-06,
"loss": 1.5503,
"step": 1800
},
{
"epoch": 0.11,
"learning_rate": 4.878151767402582e-06,
"loss": 1.5197,
"step": 1810
},
{
"epoch": 0.11,
"learning_rate": 4.876643744721921e-06,
"loss": 1.6165,
"step": 1820
},
{
"epoch": 0.11,
"learning_rate": 4.87513572204126e-06,
"loss": 1.5622,
"step": 1830
},
{
"epoch": 0.11,
"learning_rate": 4.873627699360599e-06,
"loss": 1.6142,
"step": 1840
},
{
"epoch": 0.11,
"learning_rate": 4.872119676679938e-06,
"loss": 1.5699,
"step": 1850
},
{
"epoch": 0.11,
"learning_rate": 4.870611653999276e-06,
"loss": 1.5571,
"step": 1860
},
{
"epoch": 0.11,
"learning_rate": 4.8691036313186155e-06,
"loss": 1.7093,
"step": 1870
},
{
"epoch": 0.11,
"learning_rate": 4.867595608637955e-06,
"loss": 1.5411,
"step": 1880
},
{
"epoch": 0.11,
"learning_rate": 4.866087585957293e-06,
"loss": 1.6657,
"step": 1890
},
{
"epoch": 0.11,
"learning_rate": 4.864579563276632e-06,
"loss": 1.6215,
"step": 1900
},
{
"epoch": 0.11,
"learning_rate": 4.863071540595971e-06,
"loss": 1.6317,
"step": 1910
},
{
"epoch": 0.11,
"learning_rate": 4.86156351791531e-06,
"loss": 1.573,
"step": 1920
},
{
"epoch": 0.11,
"learning_rate": 4.860055495234649e-06,
"loss": 1.5732,
"step": 1930
},
{
"epoch": 0.11,
"learning_rate": 4.858547472553988e-06,
"loss": 1.5991,
"step": 1940
},
{
"epoch": 0.11,
"learning_rate": 4.857039449873326e-06,
"loss": 1.6105,
"step": 1950
},
{
"epoch": 0.11,
"learning_rate": 4.855531427192665e-06,
"loss": 1.627,
"step": 1960
},
{
"epoch": 0.12,
"learning_rate": 4.8540234045120045e-06,
"loss": 1.59,
"step": 1970
},
{
"epoch": 0.12,
"learning_rate": 4.852515381831343e-06,
"loss": 1.5923,
"step": 1980
},
{
"epoch": 0.12,
"learning_rate": 4.851007359150682e-06,
"loss": 1.5461,
"step": 1990
},
{
"epoch": 0.12,
"learning_rate": 4.849499336470021e-06,
"loss": 1.5792,
"step": 2000
},
{
"epoch": 0.12,
"eval_gsm8k_hard_accuracy": 0.9194658197956109,
"eval_gsm8k_hard_loss": 0.34375,
"eval_gsm8k_hard_runtime": 2.5861,
"eval_gsm8k_hard_samples_per_second": 102.084,
"eval_gsm8k_hard_steps_per_second": 2.32,
"step": 2000
},
{
"epoch": 0.12,
"eval_webgpt_accuracy": 0.48958552214679374,
"eval_webgpt_loss": 2.26953125,
"eval_webgpt_runtime": 18.454,
"eval_webgpt_samples_per_second": 212.203,
"eval_webgpt_steps_per_second": 4.443,
"step": 2000
},
{
"epoch": 0.12,
"eval_squad_v2_accuracy": 0.902612776392001,
"eval_squad_v2_loss": 0.312744140625,
"eval_squad_v2_runtime": 87.2414,
"eval_squad_v2_samples_per_second": 298.757,
"eval_squad_v2_steps_per_second": 6.224,
"step": 2000
},
{
"epoch": 0.12,
"eval_adversarial_qa_accuracy": 0.8242739022715739,
"eval_adversarial_qa_loss": 0.765625,
"eval_adversarial_qa_runtime": 21.8127,
"eval_adversarial_qa_samples_per_second": 275.069,
"eval_adversarial_qa_steps_per_second": 5.731,
"step": 2000
},
{
"epoch": 0.12,
"eval_private_tuning_accuracy": 0.6703786408057409,
"eval_private_tuning_loss": 1.2275390625,
"eval_private_tuning_runtime": 61.3818,
"eval_private_tuning_samples_per_second": 345.021,
"eval_private_tuning_steps_per_second": 7.201,
"step": 2000
},
{
"epoch": 0.12,
"eval_oa_translated_accuracy": 0.6998150442485734,
"eval_oa_translated_loss": 1.2353515625,
"eval_oa_translated_runtime": 717.5723,
"eval_oa_translated_samples_per_second": 195.274,
"eval_oa_translated_steps_per_second": 4.069,
"step": 2000
},
{
"epoch": 0.12,
"eval_prosocial_dialogue_accuracy": 0.5407654479692391,
"eval_prosocial_dialogue_loss": 1.75390625,
"eval_prosocial_dialogue_runtime": 73.7989,
"eval_prosocial_dialogue_samples_per_second": 365.629,
"eval_prosocial_dialogue_steps_per_second": 7.629,
"step": 2000
},
{
"epoch": 0.12,
"eval_math_qa_accuracy": 0.5880693164312722,
"eval_math_qa_loss": 1.77734375,
"eval_math_qa_runtime": 18.7243,
"eval_math_qa_samples_per_second": 318.731,
"eval_math_qa_steps_per_second": 6.676,
"step": 2000
},
{
"epoch": 0.12,
"eval_wikihow_accuracy": 0.6112120371654417,
"eval_wikihow_loss": 1.8701171875,
"eval_wikihow_runtime": 8.0237,
"eval_wikihow_samples_per_second": 285.777,
"eval_wikihow_steps_per_second": 5.982,
"step": 2000
},
{
"epoch": 0.12,
"eval_joke_accuracy": 0.4860689916603487,
"eval_joke_loss": 2.2421875,
"eval_joke_runtime": 0.9634,
"eval_joke_samples_per_second": 78.886,
"eval_joke_steps_per_second": 2.076,
"step": 2000
},
{
"epoch": 0.12,
"eval_gsm8k_accuracy": 0.7713298162634165,
"eval_gsm8k_loss": 0.857421875,
"eval_gsm8k_runtime": 6.4806,
"eval_gsm8k_samples_per_second": 230.688,
"eval_gsm8k_steps_per_second": 4.938,
"step": 2000
},
{
"epoch": 0.12,
"eval_ted_trans_en-hi_accuracy": 0.6369557351344832,
"eval_ted_trans_en-hi_loss": 1.482421875,
"eval_ted_trans_en-hi_runtime": 1.0804,
"eval_ted_trans_en-hi_samples_per_second": 95.332,
"eval_ted_trans_en-hi_steps_per_second": 2.777,
"step": 2000
},
{
"epoch": 0.12,
"eval_ted_trans_de-ja_accuracy": 0.6203946747238221,
"eval_ted_trans_de-ja_loss": 1.6611328125,
"eval_ted_trans_de-ja_runtime": 4.4636,
"eval_ted_trans_de-ja_samples_per_second": 160.856,
"eval_ted_trans_de-ja_steps_per_second": 3.36,
"step": 2000
},
{
"epoch": 0.12,
"eval_ted_trans_nl-en_accuracy": 0.7250118302749882,
"eval_ted_trans_nl-en_loss": 1.2099609375,
"eval_ted_trans_nl-en_runtime": 3.3901,
"eval_ted_trans_nl-en_samples_per_second": 227.426,
"eval_ted_trans_nl-en_steps_per_second": 5.015,
"step": 2000
},
{
"epoch": 0.12,
"eval_ted_trans_en-ja_accuracy": 0.6299969453212504,
"eval_ted_trans_en-ja_loss": 1.5703125,
"eval_ted_trans_en-ja_runtime": 3.888,
"eval_ted_trans_en-ja_samples_per_second": 206.017,
"eval_ted_trans_en-ja_steps_per_second": 4.372,
"step": 2000
},
{
"epoch": 0.12,
"eval_ted_trans_en-es_accuracy": 0.7667636252296387,
"eval_ted_trans_en-es_loss": 0.9814453125,
"eval_ted_trans_en-es_runtime": 4.4116,
"eval_ted_trans_en-es_samples_per_second": 187.233,
"eval_ted_trans_en-es_steps_per_second": 4.08,
"step": 2000
},
{
"epoch": 0.12,
"eval_ted_trans_en-ms_accuracy": 0.6590555267254801,
"eval_ted_trans_en-ms_loss": 1.509765625,
"eval_ted_trans_en-ms_runtime": 1.256,
"eval_ted_trans_en-ms_samples_per_second": 33.438,
"eval_ted_trans_en-ms_steps_per_second": 0.796,
"step": 2000
},
{
"epoch": 0.12,
"eval_xsum_accuracy": 0.6091027172290235,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 192.154,
"eval_xsum_samples_per_second": 212.377,
"eval_xsum_steps_per_second": 4.429,
"step": 2000
},
{
"epoch": 0.12,
"eval_cnn_dailymail_accuracy": 0.6791855245499561,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 278.657,
"eval_cnn_dailymail_samples_per_second": 206.071,
"eval_cnn_dailymail_steps_per_second": 4.296,
"step": 2000
},
{
"epoch": 0.12,
"eval_multi_news_accuracy": 0.547242906816125,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 46.0885,
"eval_multi_news_samples_per_second": 195.168,
"eval_multi_news_steps_per_second": 4.079,
"step": 2000
},
{
"epoch": 0.12,
"eval_tldr_news_accuracy": 0.5899087745697358,
"eval_tldr_news_loss": 1.82421875,
"eval_tldr_news_runtime": 3.2945,
"eval_tldr_news_samples_per_second": 433.455,
"eval_tldr_news_steps_per_second": 9.106,
"step": 2000
},
{
"epoch": 0.12,
"eval_scitldr_accuracy": 0.48946515397082657,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.999,
"eval_scitldr_samples_per_second": 133.043,
"eval_scitldr_steps_per_second": 3.001,
"step": 2000
},
{
"epoch": 0.12,
"eval_samsum_accuracy": 0.6357663345455529,
"eval_samsum_loss": 1.3447265625,
"eval_samsum_runtime": 13.9699,
"eval_samsum_samples_per_second": 210.954,
"eval_samsum_steps_per_second": 4.438,
"step": 2000
},
{
"epoch": 0.12,
"eval_debate_sum_accuracy": 0.937797152052083,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 251.6686,
"eval_debate_sum_samples_per_second": 191.18,
"eval_debate_sum_steps_per_second": 3.985,
"step": 2000
},
{
"epoch": 0.12,
"eval_billsum_accuracy": 0.6771982567431513,
"eval_billsum_loss": 1.3671875,
"eval_billsum_runtime": 20.1117,
"eval_billsum_samples_per_second": 188.448,
"eval_billsum_steps_per_second": 3.928,
"step": 2000
},
{
"epoch": 0.12,
"eval_wmt2019_zh-en_accuracy": 0.6312172508742782,
"eval_wmt2019_zh-en_loss": 1.6611328125,
"eval_wmt2019_zh-en_runtime": 12.8125,
"eval_wmt2019_zh-en_samples_per_second": 310.712,
"eval_wmt2019_zh-en_steps_per_second": 6.478,
"step": 2000
},
{
"epoch": 0.12,
"eval_wmt2019_ru-en_accuracy": 0.7286970006594347,
"eval_wmt2019_ru-en_loss": 1.0859375,
"eval_wmt2019_ru-en_runtime": 10.0956,
"eval_wmt2019_ru-en_samples_per_second": 297.16,
"eval_wmt2019_ru-en_steps_per_second": 6.24,
"step": 2000
},
{
"epoch": 0.12,
"eval_wmt2019_de-en_accuracy": 0.7417590110813298,
"eval_wmt2019_de-en_loss": 1.044921875,
"eval_wmt2019_de-en_runtime": 7.9617,
"eval_wmt2019_de-en_samples_per_second": 376.551,
"eval_wmt2019_de-en_steps_per_second": 7.913,
"step": 2000
},
{
"epoch": 0.12,
"eval_wmt2019_fr-de_accuracy": 0.7250758560901603,
"eval_wmt2019_fr-de_loss": 1.130859375,
"eval_wmt2019_fr-de_runtime": 4.4498,
"eval_wmt2019_fr-de_samples_per_second": 339.792,
"eval_wmt2019_fr-de_steps_per_second": 7.191,
"step": 2000
},
{
"epoch": 0.12,
"eval_essay_instruction_accuracy": 0.5974504124007696,
"eval_essay_instruction_loss": 1.953125,
"eval_essay_instruction_runtime": 5.4069,
"eval_essay_instruction_samples_per_second": 76.383,
"eval_essay_instruction_steps_per_second": 1.665,
"step": 2000
},
{
"epoch": 0.12,
"eval_reddit_eli5_accuracy": 0.44636360001599296,
"eval_reddit_eli5_loss": 2.54296875,
"eval_reddit_eli5_runtime": 271.6541,
"eval_reddit_eli5_samples_per_second": 200.722,
"eval_reddit_eli5_steps_per_second": 4.182,
"step": 2000
},
{
"epoch": 0.12,
"eval_reddit_askh_accuracy": 0.449544836282747,
"eval_reddit_askh_loss": 2.638671875,
"eval_reddit_askh_runtime": 133.9706,
"eval_reddit_askh_samples_per_second": 147.084,
"eval_reddit_askh_steps_per_second": 3.068,
"step": 2000
},
{
"epoch": 0.12,
"eval_reddit_asks_accuracy": 0.4580279915626371,
"eval_reddit_asks_loss": 2.490234375,
"eval_reddit_asks_runtime": 148.6473,
"eval_reddit_asks_samples_per_second": 177.306,
"eval_reddit_asks_steps_per_second": 3.7,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 4.8479913137893595e-06,
"loss": 1.5467,
"step": 2010
},
{
"epoch": 0.12,
"learning_rate": 4.846483291108699e-06,
"loss": 1.6584,
"step": 2020
},
{
"epoch": 0.12,
"learning_rate": 4.844975268428038e-06,
"loss": 1.5414,
"step": 2030
},
{
"epoch": 0.12,
"learning_rate": 4.843467245747376e-06,
"loss": 1.5565,
"step": 2040
},
{
"epoch": 0.12,
"learning_rate": 4.841959223066715e-06,
"loss": 1.5377,
"step": 2050
},
{
"epoch": 0.12,
"learning_rate": 4.840451200386054e-06,
"loss": 1.5815,
"step": 2060
},
{
"epoch": 0.12,
"learning_rate": 4.838943177705393e-06,
"loss": 1.5763,
"step": 2070
},
{
"epoch": 0.12,
"learning_rate": 4.837435155024732e-06,
"loss": 1.5399,
"step": 2080
},
{
"epoch": 0.12,
"learning_rate": 4.835927132344071e-06,
"loss": 1.5486,
"step": 2090
},
{
"epoch": 0.12,
"learning_rate": 4.834419109663409e-06,
"loss": 1.5657,
"step": 2100
},
{
"epoch": 0.12,
"learning_rate": 4.8329110869827485e-06,
"loss": 1.5551,
"step": 2110
},
{
"epoch": 0.12,
"learning_rate": 4.831403064302088e-06,
"loss": 1.5926,
"step": 2120
},
{
"epoch": 0.12,
"learning_rate": 4.829895041621426e-06,
"loss": 1.5123,
"step": 2130
},
{
"epoch": 0.13,
"learning_rate": 4.828387018940765e-06,
"loss": 1.6461,
"step": 2140
},
{
"epoch": 0.13,
"learning_rate": 4.826878996260104e-06,
"loss": 1.5276,
"step": 2150
},
{
"epoch": 0.13,
"learning_rate": 4.825370973579443e-06,
"loss": 1.6597,
"step": 2160
},
{
"epoch": 0.13,
"learning_rate": 4.823862950898782e-06,
"loss": 1.5458,
"step": 2170
},
{
"epoch": 0.13,
"learning_rate": 4.822354928218121e-06,
"loss": 1.565,
"step": 2180
},
{
"epoch": 0.13,
"learning_rate": 4.820846905537459e-06,
"loss": 1.5876,
"step": 2190
},
{
"epoch": 0.13,
"learning_rate": 4.819338882856798e-06,
"loss": 1.5786,
"step": 2200
},
{
"epoch": 0.13,
"learning_rate": 4.8178308601761375e-06,
"loss": 1.5501,
"step": 2210
},
{
"epoch": 0.13,
"learning_rate": 4.816322837495476e-06,
"loss": 1.5303,
"step": 2220
},
{
"epoch": 0.13,
"learning_rate": 4.814814814814815e-06,
"loss": 1.6207,
"step": 2230
},
{
"epoch": 0.13,
"learning_rate": 4.813306792134154e-06,
"loss": 1.5776,
"step": 2240
},
{
"epoch": 0.13,
"learning_rate": 4.811798769453493e-06,
"loss": 1.6652,
"step": 2250
},
{
"epoch": 0.13,
"eval_gsm8k_hard_accuracy": 0.9206872684336957,
"eval_gsm8k_hard_loss": 0.338134765625,
"eval_gsm8k_hard_runtime": 3.5208,
"eval_gsm8k_hard_samples_per_second": 74.983,
"eval_gsm8k_hard_steps_per_second": 1.704,
"step": 2250
},
{
"epoch": 0.13,
"eval_webgpt_accuracy": 0.48976586658813137,
"eval_webgpt_loss": 2.265625,
"eval_webgpt_runtime": 18.5149,
"eval_webgpt_samples_per_second": 211.505,
"eval_webgpt_steps_per_second": 4.429,
"step": 2250
},
{
"epoch": 0.13,
"eval_squad_v2_accuracy": 0.906819430938993,
"eval_squad_v2_loss": 0.292724609375,
"eval_squad_v2_runtime": 87.4932,
"eval_squad_v2_samples_per_second": 297.897,
"eval_squad_v2_steps_per_second": 6.206,
"step": 2250
},
{
"epoch": 0.13,
"eval_adversarial_qa_accuracy": 0.8013193636010866,
"eval_adversarial_qa_loss": 0.8310546875,
"eval_adversarial_qa_runtime": 21.1349,
"eval_adversarial_qa_samples_per_second": 283.891,
"eval_adversarial_qa_steps_per_second": 5.914,
"step": 2250
},
{
"epoch": 0.13,
"eval_private_tuning_accuracy": 0.6708063990236344,
"eval_private_tuning_loss": 1.2236328125,
"eval_private_tuning_runtime": 65.2825,
"eval_private_tuning_samples_per_second": 324.405,
"eval_private_tuning_steps_per_second": 6.771,
"step": 2250
},
{
"epoch": 0.13,
"eval_oa_translated_accuracy": 0.7019998653150882,
"eval_oa_translated_loss": 1.2255859375,
"eval_oa_translated_runtime": 721.6165,
"eval_oa_translated_samples_per_second": 194.179,
"eval_oa_translated_steps_per_second": 4.046,
"step": 2250
},
{
"epoch": 0.13,
"eval_prosocial_dialogue_accuracy": 0.5347828898075921,
"eval_prosocial_dialogue_loss": 1.7451171875,
"eval_prosocial_dialogue_runtime": 75.1624,
"eval_prosocial_dialogue_samples_per_second": 358.996,
"eval_prosocial_dialogue_steps_per_second": 7.49,
"step": 2250
},
{
"epoch": 0.13,
"eval_math_qa_accuracy": 0.5915055732195003,
"eval_math_qa_loss": 1.7578125,
"eval_math_qa_runtime": 18.6859,
"eval_math_qa_samples_per_second": 319.384,
"eval_math_qa_steps_per_second": 6.69,
"step": 2250
},
{
"epoch": 0.13,
"eval_wikihow_accuracy": 0.6088822632089863,
"eval_wikihow_loss": 1.8623046875,
"eval_wikihow_runtime": 7.0536,
"eval_wikihow_samples_per_second": 325.084,
"eval_wikihow_steps_per_second": 6.805,
"step": 2250
},
{
"epoch": 0.13,
"eval_joke_accuracy": 0.4927975739196361,
"eval_joke_loss": 2.203125,
"eval_joke_runtime": 1.8797,
"eval_joke_samples_per_second": 40.431,
"eval_joke_steps_per_second": 1.064,
"step": 2250
},
{
"epoch": 0.13,
"eval_gsm8k_accuracy": 0.7729670729488812,
"eval_gsm8k_loss": 0.84765625,
"eval_gsm8k_runtime": 5.4471,
"eval_gsm8k_samples_per_second": 274.458,
"eval_gsm8k_steps_per_second": 5.875,
"step": 2250
},
{
"epoch": 0.13,
"eval_ted_trans_en-hi_accuracy": 0.6606576107655199,
"eval_ted_trans_en-hi_loss": 1.3681640625,
"eval_ted_trans_en-hi_runtime": 1.9419,
"eval_ted_trans_en-hi_samples_per_second": 53.04,
"eval_ted_trans_en-hi_steps_per_second": 1.545,
"step": 2250
},
{
"epoch": 0.13,
"eval_ted_trans_de-ja_accuracy": 0.6181494047961354,
"eval_ted_trans_de-ja_loss": 1.669921875,
"eval_ted_trans_de-ja_runtime": 4.4657,
"eval_ted_trans_de-ja_samples_per_second": 160.781,
"eval_ted_trans_de-ja_steps_per_second": 3.359,
"step": 2250
},
{
"epoch": 0.13,
"eval_ted_trans_nl-en_accuracy": 0.7275129478913435,
"eval_ted_trans_nl-en_loss": 1.193359375,
"eval_ted_trans_nl-en_runtime": 3.5649,
"eval_ted_trans_nl-en_samples_per_second": 216.275,
"eval_ted_trans_nl-en_steps_per_second": 4.769,
"step": 2250
},
{
"epoch": 0.13,
"eval_ted_trans_en-ja_accuracy": 0.6332216902623955,
"eval_ted_trans_en-ja_loss": 1.5615234375,
"eval_ted_trans_en-ja_runtime": 4.3033,
"eval_ted_trans_en-ja_samples_per_second": 186.138,
"eval_ted_trans_en-ja_steps_per_second": 3.95,
"step": 2250
},
{
"epoch": 0.13,
"eval_ted_trans_en-es_accuracy": 0.7696410515672396,
"eval_ted_trans_en-es_loss": 0.96875,
"eval_ted_trans_en-es_runtime": 5.0224,
"eval_ted_trans_en-es_samples_per_second": 164.462,
"eval_ted_trans_en-es_steps_per_second": 3.584,
"step": 2250
},
{
"epoch": 0.13,
"eval_ted_trans_en-ms_accuracy": 0.6727053140096618,
"eval_ted_trans_en-ms_loss": 1.4013671875,
"eval_ted_trans_en-ms_runtime": 0.4904,
"eval_ted_trans_en-ms_samples_per_second": 85.65,
"eval_ted_trans_en-ms_steps_per_second": 2.039,
"step": 2250
},
{
"epoch": 0.13,
"eval_xsum_accuracy": 0.6095022536611729,
"eval_xsum_loss": NaN,
"eval_xsum_runtime": 191.8464,
"eval_xsum_samples_per_second": 212.717,
"eval_xsum_steps_per_second": 4.436,
"step": 2250
},
{
"epoch": 0.13,
"eval_cnn_dailymail_accuracy": 0.6799426165023833,
"eval_cnn_dailymail_loss": NaN,
"eval_cnn_dailymail_runtime": 276.5156,
"eval_cnn_dailymail_samples_per_second": 207.666,
"eval_cnn_dailymail_steps_per_second": 4.329,
"step": 2250
},
{
"epoch": 0.13,
"eval_multi_news_accuracy": 0.547789914899022,
"eval_multi_news_loss": NaN,
"eval_multi_news_runtime": 46.7011,
"eval_multi_news_samples_per_second": 192.608,
"eval_multi_news_steps_per_second": 4.026,
"step": 2250
},
{
"epoch": 0.13,
"eval_tldr_news_accuracy": 0.586805229004044,
"eval_tldr_news_loss": 1.861328125,
"eval_tldr_news_runtime": 4.1288,
"eval_tldr_news_samples_per_second": 345.86,
"eval_tldr_news_steps_per_second": 7.266,
"step": 2250
},
{
"epoch": 0.13,
"eval_scitldr_accuracy": 0.49270664505672607,
"eval_scitldr_loss": NaN,
"eval_scitldr_runtime": 2.9031,
"eval_scitldr_samples_per_second": 137.437,
"eval_scitldr_steps_per_second": 3.1,
"step": 2250
},
{
"epoch": 0.13,
"eval_samsum_accuracy": 0.6352255674674526,
"eval_samsum_loss": 1.3408203125,
"eval_samsum_runtime": 13.3091,
"eval_samsum_samples_per_second": 221.427,
"eval_samsum_steps_per_second": 4.658,
"step": 2250
},
{
"epoch": 0.13,
"eval_debate_sum_accuracy": 0.9398914583902843,
"eval_debate_sum_loss": NaN,
"eval_debate_sum_runtime": 244.3103,
"eval_debate_sum_samples_per_second": 196.938,
"eval_debate_sum_steps_per_second": 4.105,
"step": 2250
},
{
"epoch": 0.13,
"eval_billsum_accuracy": 0.6780196221313985,
"eval_billsum_loss": 1.3662109375,
"eval_billsum_runtime": 26.3691,
"eval_billsum_samples_per_second": 143.729,
"eval_billsum_steps_per_second": 2.996,
"step": 2250
},
{
"epoch": 0.13,
"eval_wmt2019_zh-en_accuracy": 0.6364605543710021,
"eval_wmt2019_zh-en_loss": 1.6376953125,
"eval_wmt2019_zh-en_runtime": 14.2747,
"eval_wmt2019_zh-en_samples_per_second": 278.886,
"eval_wmt2019_zh-en_steps_per_second": 5.815,
"step": 2250
},
{
"epoch": 0.13,
"eval_wmt2019_ru-en_accuracy": 0.7199560058060365,
"eval_wmt2019_ru-en_loss": 1.107421875,
"eval_wmt2019_ru-en_runtime": 8.7473,
"eval_wmt2019_ru-en_samples_per_second": 342.965,
"eval_wmt2019_ru-en_steps_per_second": 7.202,
"step": 2250
},
{
"epoch": 0.13,
"eval_wmt2019_de-en_accuracy": 0.7420960891168457,
"eval_wmt2019_de-en_loss": 1.0400390625,
"eval_wmt2019_de-en_runtime": 8.7795,
"eval_wmt2019_de-en_samples_per_second": 341.476,
"eval_wmt2019_de-en_steps_per_second": 7.176,
"step": 2250
},
{
"epoch": 0.13,
"eval_wmt2019_fr-de_accuracy": 0.7250604501209003,
"eval_wmt2019_fr-de_loss": 1.12890625,
"eval_wmt2019_fr-de_runtime": 4.8598,
"eval_wmt2019_fr-de_samples_per_second": 311.121,
"eval_wmt2019_fr-de_steps_per_second": 6.585,
"step": 2250
},
{
"epoch": 0.13,
"eval_essay_instruction_accuracy": 0.5982741083077195,
"eval_essay_instruction_loss": 1.94921875,
"eval_essay_instruction_runtime": 5.5402,
"eval_essay_instruction_samples_per_second": 74.546,
"eval_essay_instruction_steps_per_second": 1.624,
"step": 2250
},
{
"epoch": 0.13,
"eval_reddit_eli5_accuracy": 0.44749512828734445,
"eval_reddit_eli5_loss": 2.541015625,
"eval_reddit_eli5_runtime": 289.5058,
"eval_reddit_eli5_samples_per_second": 188.345,
"eval_reddit_eli5_steps_per_second": 3.924,
"step": 2250
},
{
"epoch": 0.13,
"eval_reddit_askh_accuracy": 0.45043337961716656,
"eval_reddit_askh_loss": 2.63671875,
"eval_reddit_askh_runtime": 112.2629,
"eval_reddit_askh_samples_per_second": 175.525,
"eval_reddit_askh_steps_per_second": 3.661,
"step": 2250
},
{
"epoch": 0.13,
"eval_reddit_asks_accuracy": 0.4590568852017904,
"eval_reddit_asks_loss": 2.490234375,
"eval_reddit_asks_runtime": 152.7985,
"eval_reddit_asks_samples_per_second": 172.489,
"eval_reddit_asks_steps_per_second": 3.6,
"step": 2250
}
],
"max_steps": 34156,
"num_train_epochs": 2,
"total_flos": 1.6587065990940983e+19,
"trial_name": null,
"trial_params": null
}